# Section 0. Setup
options(expressions = 50000)

message(strrep("=", 70))
message("PKMID JCR — ONLINE APPENDIX v5.0 (Standalone)")
message("Timestamp: ", format(Sys.time(), "%Y-%m-%d %H:%M:%S"))
message(strrep("=", 70))

# Section 1. Packages
message("\n[0] Loading packages...")

pkgs <- c(
  "haven", "dplyr", "tibble", "tidyr", "stringr",
  "MASS", "pscl", "sandwich",
  "officer", "flextable",
  "ggplot2", "scales"
)
to_install <- pkgs[!pkgs %in% rownames(installed.packages())]
if (length(to_install) > 0)
  install.packages(to_install, dependencies = TRUE, quiet = TRUE)
invisible(lapply(pkgs, require, character.only = TRUE))

for (p in c("mice", "lme4", "MatchIt", "survival")) {
  if (!p %in% rownames(installed.packages()))
    tryCatch(install.packages(p, dependencies = TRUE, quiet = TRUE),
             error = function(e) message("  Could not install ", p))
  suppressPackageStartupMessages(
    tryCatch(require(p, character.only = TRUE), error = function(e) FALSE))
}

set.seed(1025)

# Section 2. Data
message("\n[1] Loading data...")

DATA_PATH <- "data.dta"

if (!file.exists(DATA_PATH))
  stop("Data file not found: ", DATA_PATH,
       "\n  -> Update DATA_PATH at the top of this script.")

d0 <- haven::read_dta(DATA_PATH)

d0$troop_binary <- as.integer(d0$troop_binary)
d0$initiator    <- as.integer(d0$initiator)
d0$cow          <- as.integer(d0$cow)

ensure_numeric <- function(x) as.numeric(x)

if (!"democracy_polity2" %in% names(d0)) {
  demo_src <- if ("democracy_use" %in% names(d0)) "democracy_use" else
    if ("democracy" %in% names(d0)) "democracy" else
      if ("polity2" %in% names(d0)) "polity2" else NULL
  if (!is.null(demo_src)) {
    raw_demo  <- ensure_numeric(d0[[demo_src]])
    n_na_raw  <- sum(is.na(raw_demo))
    country_mean_demo <- ave(raw_demo, d0$cow,
                             FUN = function(x) { m <- mean(x, na.rm = TRUE); if (is.nan(m)) NA_real_ else m })
    filled_p2 <- ifelse(is.na(raw_demo) & !is.na(country_mean_demo),
                        country_mean_demo, raw_demo)
    n_na_p2 <- sum(is.na(filled_p2))
    if (n_na_p2 > 0) {
      grand_mean <- mean(country_mean_demo, na.rm = TRUE)
      filled_p3 <- ifelse(is.na(filled_p2), grand_mean, filled_p2)
    } else filled_p3 <- filled_p2
    d0$democracy_polity2 <- filled_p3
    message(sprintf("  democracy_polity2: raw NA=%d -> after fill=%d (recovered %d)",
                    n_na_raw, sum(is.na(filled_p3)), n_na_raw - sum(is.na(filled_p3))))
  }
}

alias_if_absent <- function(df, target, source, transform = identity) {
  if (!target %in% names(df) && source %in% names(df))
    df[[target]] <- transform(ensure_numeric(df[[source]]))
  df
}

d0 <- d0 |>
  alias_if_absent("ln_capability",  "cinc",    function(x) log(pmax(x, 1e-8))) |>
  alias_if_absent("trade_openness", "wdi_trade") |>
  alias_if_absent("igo_membership", "igo_memb") |>
  alias_if_absent("ln_gdp_cap",     "gdp_cap", function(x) log(pmax(x, 1e-8))) |>
  alias_if_absent("time_squared",   "time2") |>
  alias_if_absent("time_cubed",     "time3")

if (!"time_squared" %in% names(d0)) d0$time_squared <- ensure_numeric(d0$time)^2
if (!"time_cubed"   %in% names(d0)) d0$time_cubed   <- ensure_numeric(d0$time)^3

HOSTLEV_LABS <- c("No Dispute", "No Military Action", "Threat",
                  "Display of Force", "Use of Force", "War")
d0$hostlev_f <- factor(d0$hostlev, levels = 0:5,
                       labels = HOSTLEV_LABS, ordered = TRUE)

message("  Loaded: N = ", nrow(d0), " rows")

message("  Constructing ZINB inflation variables...")
winsor01 <- function(x, lo = 0.01, hi = 0.99) pmin(pmax(x, lo), hi)

d0$cum_peace_10pp <- winsor01(
  ave(as.numeric(d0$mid_sum == 0), d0$cow,
      FUN = function(x) mean(x, na.rm = TRUE))
) / 0.10
message(sprintf("  cum_peace_10pp : mean=%.3f SD=%.3f range=[%.3f,%.3f]",
                mean(d0$cum_peace_10pp, na.rm=TRUE), sd(d0$cum_peace_10pp, na.rm=TRUE),
                min(d0$cum_peace_10pp, na.rm=TRUE), max(d0$cum_peace_10pp, na.rm=TRUE)))

igo_col <- if ("sum_igo_anytype" %in% names(d0)) "sum_igo_anytype" else "igo_membership"
mean_igo_country <- tapply(as.numeric(d0[[igo_col]]), d0$cow,
                           function(x) mean(x, na.rm = TRUE))
igo_tercile_cut <- stats::quantile(mean_igo_country, probs = 2/3, na.rm = TRUE)
mean_igo_obs <- ave(as.numeric(d0[[igo_col]]), d0$cow,
                    FUN = function(x) mean(x, na.rm = TRUE))
d0$high_igo <- as.integer(mean_igo_obs >= igo_tercile_cut)
message(sprintf("  high_igo       : %d obs (%.1f%% of panel)",
                sum(d0$high_igo, na.rm=TRUE), 100*mean(d0$high_igo, na.rm=TRUE)))

if ("no_contig" %in% names(d0)) {
  d0$no_contig <- as.integer(d0$no_contig)
} else {
  no_contig_cows <- c(2, 20, 90, 155, 230, 255, 325, 375, 460, 525, 640)
  d0$no_contig <- as.integer(d0$cow %in% no_contig_cows)
}
message(sprintf("  no_contig      : %d obs (%.1f%% of panel)",
                sum(d0$no_contig == 1L, na.rm=TRUE), 100*mean(d0$no_contig == 1L, na.rm=TRUE)))

# Section 3. Variable Sets
IVS    <- c("troop_binary", "ln_max_total_peacekeeper", "logged_pk5years")
RC     <- c("lag_mid_bin", "ucdp_intrastate",
            "persistent_dispute", "ucdp_interstate_5yr")
CTRL   <- c("democracy_polity2", "ln_capability",
            "trade_openness", "igo_membership", "ln_gdp_cap")
REGION <- c("asia", "africa", "north_america", "south_america")
TPOLY  <- c("time", "time_squared", "time_cubed")

keep_present <- function(v) v[v %in% names(d0)]

RHS_COMMON  <- keep_present(c(RC, CTRL, REGION, TPOLY))
INFL_VARS   <- keep_present(c(
  "persistent_dispute", "ucdp_interstate_5yr", "ucdp_intrastate",
  "lag_mid_bin", "democracy_polity2", "ln_gdp_cap", "ln_capability"
))
INFL_SIMPLE <- keep_present(c(
  "persistent_dispute", "ucdp_interstate_5yr", "ln_gdp_cap"
))

d0_names    <- names(d0)
kp          <- function(v) v[v %in% d0_names]
IVS_safe    <- kp(IVS)
RC_safe     <- kp(RC)
CTRL_safe   <- kp(CTRL)
REGION_safe <- kp(REGION)
TPOLY_safe  <- kp(TPOLY)
RHS_safe    <- kp(c(RC, CTRL, REGION, TPOLY))
INFL_S_safe <- kp(c("persistent_dispute", "ucdp_interstate_5yr", "ln_gdp_cap"))

TERM_ORDER_MAIN_safe <- kp(c(IVS, RC, CTRL))
TERM_ORDER_FULL_safe <- kp(c(IVS, RC, CTRL, REGION, TPOLY))

rm(kp)

rhs_full <- function(iv) paste(c(iv, RHS_safe), collapse = " + ")
infl_rhs <- function() {
  if (exists("INFL_NEW") && length(INFL_NEW)) paste(INFL_NEW, collapse = " + ")
  else if (length(INFL_S_safe)) paste(INFL_S_safe, collapse = " + ")
  else "1"
}

# Section 4. Analytic Samples
message("\n[3] Building analytic samples...")

cc_flag <- function(df, vars) stats::complete.cases(df[, vars, drop = FALSE])

vars_T1 <- unique(keep_present(c("hostlev_f", RHS_COMMON, IVS)))
vars_T2 <- unique(keep_present(c("initiator",  RHS_COMMON, IVS)))
vars_T3 <- unique(keep_present(c("mid_sum",    RHS_COMMON, IVS, INFL_VARS)))

d0$sample_T1 <- as.integer(cc_flag(d0, vars_T1))
d0$sample_T2 <- as.integer(cc_flag(d0, vars_T2))
d0$sample_T3 <- as.integer(cc_flag(d0, vars_T3))

d_T1 <- dplyr::filter(d0, sample_T1 == 1L)
d_T2 <- dplyr::filter(d0, sample_T2 == 1L)
d_T3 <- dplyr::filter(d0, sample_T3 == 1L)

d_T1$cow <- as.integer(d_T1$cow)
d_T2$cow <- as.integer(d_T2$cow)
d_T3$cow <- as.integer(d_T3$cow)

message("  T1 N = ", nrow(d_T1), " | T2 N = ", nrow(d_T2), " | T3 N = ", nrow(d_T3))

# Section 5. Output Paths
OUT_DIR  <- "pk_outputs"
SUPP_DIR <- file.path(OUT_DIR, "supplementary")
FIG_DIR  <- file.path(OUT_DIR, "appendix_figures")
for (dd in c(OUT_DIR, SUPP_DIR, FIG_DIR))
  if (!dir.exists(dd)) dir.create(dd, recursive = TRUE)

# Section 6. Utilities and Formatting
`%||%` <- function(x, y) if (is.null(x)) y else x

stars <- function(p) {
  dplyr::case_when(
    is.na(p)  ~ "",
    p < 0.01  ~ "***",
    p < 0.05  ~ "**",
    p < 0.10  ~ "*",
    TRUE      ~ ""
  )
}

fmt_cell_ci <- function(est, lo, hi, p, se = NULL, digits = 3) {
  est <- est[1]; lo <- lo[1]; hi <- hi[1]; p <- p[1]
  se_val <- if (!is.null(se)) se[1] else NULL
  if (is.na(est)) return("\u2014")
  star_str <- stars(p)
  if (!is.na(lo) && !is.na(hi) && is.finite(lo) && is.finite(hi)) {
    paste0(formatC(est, format = "f", digits = digits), star_str,
           "\n[", formatC(lo, format = "f", digits = digits), "; ",
           formatC(hi, format = "f", digits = digits), "]")
  } else if (!is.null(se_val) && !is.na(se_val) && is.finite(se_val) && se_val > 0) {
    paste0(formatC(est, format = "f", digits = digits), star_str,
           "\n(", formatC(se_val, format = "f", digits = digits), ")")
  } else {
    paste0(formatC(est, format = "f", digits = digits), star_str)
  }
}

fmt_cell_or <- function(beta, lo, hi, p, digits = 3) {
  beta <- beta[1]; lo <- lo[1]; hi <- hi[1]; p <- p[1]
  if (is.na(beta)) return("\u2014")
  star_str <- stars(p)
  if (!is.na(lo) && !is.na(hi) && is.finite(lo) && is.finite(hi)) {
    paste0(formatC(exp(beta), format = "f", digits = digits), star_str,
           "\n[", formatC(exp(lo), format = "f", digits = digits), "; ",
           formatC(exp(hi), format = "f", digits = digits), "]")
  } else {
    paste0(formatC(exp(beta), format = "f", digits = digits), star_str)
  }
}

VAR_LABELS <- c(
  troop_binary             = "Troop Contribution (Binary)",
  ln_max_total_peacekeeper = "Ln(All Peacekeepers)",
  logged_pk5years          = "Logged Troops (5-Year Stock)",
  lag_mid_bin              = "Any MID (t-1)",
  ucdp_intrastate          = "UCDP Intrastate Conflict",
  persistent_dispute       = "Persistent Dispute (Prior 5 Years)",
  ucdp_interstate_5yr      = "Interstate Conflict (Prior 5 Years)",
  democracy_polity2        = "Democracy (Polity2)",
  ln_capability            = "Ln(CINC Capability)",
  trade_openness           = "Trade Openness (% GDP)",
  igo_membership           = "IGO Memberships",
  ln_gdp_cap               = "Ln(GDP per capita)",
  asia                     = "Asia",
  africa                   = "Africa",
  north_america            = "North America",
  south_america            = "South America",
  time                     = "Time",
  time_squared             = "Time^2",
  time_cubed               = "Time^3",
  cum_peace_10pp           = "Cumulative Peace (per 10 pp)",
  high_igo                 = "High IGO Embeddedness (Top Tercile)",
  no_contig                = "No Contiguous Neighbors",
  mid_sum_clean            = "MID Count (Clean)"
)

label_term <- function(x) {
  out <- unname(VAR_LABELS[x])
  ifelse(is.na(out),
         stringr::str_to_title(stringr::str_replace_all(x, "_", " ")), out)
}

# Section 7. Plotting
BASE_FAMILY <- "serif"
COL_NOTROOPS  <- "#A8C5D8"
COL_TROOPS    <- "#E8A0B4"
COL_POINT_1   <- "#4A7BA7"
COL_POINT_2   <- "#D64078"
COL_POINT_3   <- "#5A9E6F"
COL_NAIVE     <- "#6BAED6"
COL_CF        <- "#E377C2"
COL_OL        <- "#2C7BB6"
COL_LG        <- "#D7191C"
COL_ZI        <- "#1A9641"

theme_pub <- function(base_size = 11) {
  ggplot2::theme_classic(base_size = base_size, base_family = BASE_FAMILY) +
    ggplot2::theme(
      plot.title    = ggplot2::element_text(face = "bold", hjust = 0.5, size = base_size + 2),
      plot.subtitle = ggplot2::element_text(hjust = 0.5, size = base_size, color = "gray20"),
      plot.caption  = ggplot2::element_text(hjust = 0, size = base_size - 2,
                                            color = "gray40", margin = ggplot2::margin(t = 10)),
      axis.title.x  = ggplot2::element_text(margin = ggplot2::margin(t = 10), size = base_size),
      axis.title.y  = ggplot2::element_text(margin = ggplot2::margin(r = 10), size = base_size),
      axis.text      = ggplot2::element_text(size = base_size - 1, color = "black"),
      legend.title   = ggplot2::element_blank(),
      legend.position  = "bottom",
      legend.text    = ggplot2::element_text(size = base_size),
      panel.grid.major = ggplot2::element_blank(),
      panel.grid.minor = ggplot2::element_blank(),
      plot.margin    = ggplot2::margin(15, 20, 15, 20),
      axis.line      = ggplot2::element_line(color = "black", linewidth = 0.5)
    )
}

save_fig <- function(p, filename, w = 8.5, h = 5.5, dpi = 450) {
  out <- file.path(FIG_DIR, filename)
  tryCatch({
    ggplot2::ggsave(out, plot = p, width = w, height = h, units = "in",
                    dpi = dpi, bg = "white")
    message("  -> Figure saved: ", basename(out))
  }, error = function(e) message("  [WARN] Figure save failed: ", conditionMessage(e)))
  invisible(out)
}

tidy_from_vcov <- function(model, V, level = 0.95) {
  b    <- stats::coef(model)
  se   <- sqrt(pmax(diag(V), 0))
  z    <- ifelse(se > 0, b / se, NA_real_)
  pv   <- ifelse(!is.na(z), 2 * stats::pnorm(abs(z), lower.tail = FALSE), NA_real_)
  crit <- stats::qnorm(1 - (1 - level) / 2)
  tibble::tibble(
    term      = names(b),
    estimate  = as.numeric(b),
    std.error = as.numeric(se),
    conf.low  = as.numeric(ifelse(se > 0, b - crit * se, NA_real_)),
    conf.high = as.numeric(ifelse(se > 0, b + crit * se, NA_real_)),
    p.value   = as.numeric(pv))
}

empty_td <- tibble::tibble(term = character(), estimate = numeric(),
                           std.error = numeric(), conf.low = numeric(), conf.high = numeric(),
                           p.value = numeric())
has_rows <- function(x) is.data.frame(x) && length(nrow(x)) == 1 && !is.na(nrow(x)) && nrow(x) > 0

tidy_polr_robust <- function(model, level = 0.95) {
  b    <- stats::coef(model)
  nms  <- names(b)
  p    <- length(b)
  crit <- stats::qnorm(1 - (1 - level) / 2)
  
  ci <- tryCatch(suppressWarnings(stats::confint(model, level = level)),
                 error = function(e) NULL)
  if (!is.null(ci)) {
    if (!is.matrix(ci))
      ci <- matrix(ci, nrow = 1, dimnames = list(nms[1],
                                                 c(paste0(100 * (1 - level) / 2, " %"),
                                                   paste0(100 * (1 - (1 - level) / 2), " %"))))
    ci_rows <- intersect(nms, rownames(ci))
    if (length(ci_rows) == p)
      ci_slopes <- ci[ci_rows, , drop = FALSE]
    else if (p == 1 && nrow(ci) >= 1) {
      ci_slopes <- ci[1, , drop = FALSE]; rownames(ci_slopes) <- nms
    } else ci_slopes <- NULL
    if (!is.null(ci_slopes) && all(is.finite(ci_slopes))) {
      se <- (ci_slopes[, 2] - ci_slopes[, 1]) / (2 * crit)
      z  <- b / se
      pv <- 2 * stats::pnorm(abs(z), lower.tail = FALSE)
      return(tibble::tibble(term = nms, estimate = as.numeric(b),
                            std.error = as.numeric(se), conf.low = as.numeric(ci_slopes[, 1]),
                            conf.high = as.numeric(ci_slopes[, 2]), p.value = as.numeric(pv)))
    }
  }
  
  V2 <- tryCatch({
    H <- model$Hessian %||% model$Hess
    if (is.null(H)) stop("No Hessian")
    Hs <- H[seq_len(p), seq_len(p), drop = FALSE]
    lam <- max(abs(diag(Hs))) * 1e-6
    Hr <- Hs - diag(lam, p); Hi <- solve(Hr)
    if (any(diag(Hi) < 0)) Hi <- solve(-Hr)
    if (any(diag(Hi) < 0) || any(!is.finite(diag(Hi)))) {
      Hi <- MASS::ginv(-Hs)
      if (any(diag(Hi) < 0)) Hi <- MASS::ginv(Hs)
    }
    if (all(is.finite(diag(Hi))) && all(diag(Hi) >= 0)) {
      dimnames(Hi) <- list(nms, nms); Hi
    } else stop("Invalid")
  }, error = function(e) NULL)
  
  if (!is.null(V2)) {
    se <- sqrt(diag(V2)); z <- b / se
    pv <- 2 * stats::pnorm(abs(z), lower.tail = FALSE)
    return(tibble::tibble(term = nms, estimate = as.numeric(b),
                          std.error = as.numeric(se), conf.low = as.numeric(b - crit * se),
                          conf.high = as.numeric(b + crit * se), p.value = as.numeric(pv)))
  }
  
  message("  [tidy_polr] Bootstrapping (200 reps)...")
  mf <- model$model
  if (is.null(mf)) stop("No model frame for bootstrap.")
  fmla <- stats::formula(model)
  boot_mat <- matrix(NA_real_, 200, p, dimnames = list(NULL, nms))
  for (bb in 1:200) {
    mb <- tryCatch(MASS::polr(fmla, data = mf[sample(nrow(mf), replace = TRUE), ],
                              method = "logistic", Hess = FALSE, model = FALSE), error = function(e) NULL)
    if (!is.null(mb) && length(stats::coef(mb)) == p) boot_mat[bb, ] <- stats::coef(mb)
  }
  boot_mat <- boot_mat[complete.cases(boot_mat), , drop = FALSE]
  se <- apply(boot_mat, 2, sd)
  z  <- b / se; pv <- 2 * stats::pnorm(abs(z), lower.tail = FALSE)
  message("  [tidy_polr] Bootstrap SEs (", nrow(boot_mat), " valid reps).")
  tibble::tibble(term = nms, estimate = as.numeric(b), std.error = as.numeric(se),
                 conf.low = as.numeric(b - crit * se), conf.high = as.numeric(b + crit * se),
                 p.value = as.numeric(pv))
}

tidy_polr_slopes <- function(model, data = NULL, level = 0.95) {
  tidy_polr_robust(model, level = level)
}

tidy_glm_simple <- function(model, level = 0.95) {
  tryCatch({
    V  <- stats::vcov(model)
    td <- tidy_from_vcov(model, V, level)
    td[td$term != "(Intercept)", ]
  }, error = function(e) {
    message("  [tidy_glm_simple] vcov failed: ", conditionMessage(e))
    empty_td
  })
}

tidy_glm_cl <- function(model, data = NULL, level = 0.95) {
  tidy_glm_simple(model, level)
}

tidy_zeroinfl_part <- function(model, data = NULL, part = c("count", "zero"), level = 0.95) {
  part   <- match.arg(part)
  prefix <- if (part == "count") "count_" else "zero_"
  crit   <- stats::qnorm(1 - (1 - level) / 2)
  b_slot <- if (part == "count") model$coefficients$count else model$coefficients$zero
  if (is.null(b_slot) || length(b_slot) == 0) return(empty_td)
  b   <- as.numeric(b_slot)
  nms <- names(b_slot)
  p   <- length(b)
  se <- tryCatch({
    V  <- stats::vcov(model)
    rn <- rownames(V)
    idx_a <- grep(paste0("^", prefix), rn)
    idx_b <- which(rn %in% nms)
    n_count <- length(model$coefficients$count)
    n_zero  <- length(model$coefficients$zero)
    idx_c   <- if (part == "zero") seq(n_count + 1, n_count + n_zero) else seq_len(n_count)
    idx_c   <- idx_c[idx_c >= 1 & idx_c <= nrow(V)]
    idx <- if      (length(idx_a) == p) idx_a
    else if (length(idx_b) == p) idx_b
    else if (length(idx_c) == p) idx_c
    else NULL
    if (is.null(idx)) NULL
    else {
      se_v <- sqrt(pmax(diag(V[idx, idx, drop = FALSE]), 0))
      if (any(!is.finite(se_v))) NULL else se_v
    }
  }, error = function(e) NULL)
  if (is.null(se)) {
    sm <- tryCatch(summary(model)$coefficients[[part]], error = function(e) NULL)
    if (!is.null(sm) && "Std. Error" %in% colnames(sm)) {
      se_sm <- as.numeric(sm[, "Std. Error"])
      if (!is.null(rownames(sm)) && all(nms %in% rownames(sm))) se_sm <- se_sm[match(nms, rownames(sm))]
      se <- se_sm
    } else {
      se <- rep(NA_real_, p)
    }
  }
  z  <- ifelse(!is.na(se) & se > 0, b / se, NA_real_)
  pv <- ifelse(!is.na(z), 2 * stats::pnorm(abs(z), lower.tail = FALSE), NA_real_)
  td <- tibble::tibble(
    term = nms,
    estimate = b,
    std.error = se,
    conf.low = ifelse(!is.na(se) & se > 0, b - crit * se, NA_real_),
    conf.high = ifelse(!is.na(se) & se > 0, b + crit * se, NA_real_),
    p.value = pv
  )
  td$term <- sub("^zero_", "", td$term)
  td$term <- sub("^count_", "", td$term)
  td[td$term != "(Intercept)", ]
}

extract_count_direct <- function(model) {
  if (!inherits(model, "zeroinfl")) return(empty_td)
  b <- model$coefficients$count
  if (is.null(b) || length(b) == 0) return(empty_td)
  se_vec <- tryCatch({
    V   <- stats::vcov(model)
    nc  <- length(model$coefficients$count)
    idx <- seq_len(nc)
    if (length(idx) == nc) sqrt(pmax(diag(V)[idx], 0)) else rep(NA_real_, nc)
  }, error = function(e) rep(NA_real_, length(b)))
  crit <- stats::qnorm(0.975)
  td <- tibble::tibble(
    term = names(b),
    estimate = as.numeric(b),
    std.error = se_vec,
    conf.low = ifelse(!is.na(se_vec) & se_vec > 0, as.numeric(b) - crit * se_vec, NA_real_),
    conf.high = ifelse(!is.na(se_vec) & se_vec > 0, as.numeric(b) + crit * se_vec, NA_real_),
    p.value = ifelse(!is.na(se_vec) & se_vec > 0, 2 * stats::pnorm(abs(as.numeric(b) / se_vec), lower.tail = FALSE), NA_real_)
  )
  td$term <- sub("^count_", "", td$term)
  td[td$term != "(Intercept)", ]
}

extract_inflate_direct <- function(model) {
  if (!inherits(model, "zeroinfl")) return(empty_td)
  b <- model$coefficients$zero
  if (is.null(b) || length(b) == 0) return(empty_td)
  se_vec <- tryCatch({
    V   <- stats::vcov(model)
    nc  <- length(model$coefficients$count)
    nz  <- length(model$coefficients$zero)
    idx <- seq(nc + 1, nc + nz)
    idx <- idx[idx <= nrow(V)]
    if (length(idx) == nz) sqrt(pmax(diag(V)[idx], 0)) else rep(NA_real_, nz)
  }, error = function(e) rep(NA_real_, length(b)))
  crit <- stats::qnorm(0.975)
  td <- tibble::tibble(
    term = names(b),
    estimate = as.numeric(b),
    std.error = se_vec,
    conf.low = ifelse(!is.na(se_vec) & se_vec > 0, as.numeric(b) - crit * se_vec, NA_real_),
    conf.high = ifelse(!is.na(se_vec) & se_vec > 0, as.numeric(b) + crit * se_vec, NA_real_),
    p.value = ifelse(!is.na(se_vec) & se_vec > 0, 2 * stats::pnorm(abs(as.numeric(b) / se_vec), lower.tail = FALSE), NA_real_)
  )
  td$term <- sub("^zero_", "", td$term)
  td[td$term != "(Intercept)", ]
}

tidy_count_model <- function(model, data = NULL, level = 0.95) {
  tryCatch({
    if (inherits(model, "zeroinfl")) tidy_zeroinfl_part(model, data, "count", level) else tidy_glm_simple(model, level)
  }, error = function(e) { message("  [tidy_count_model] error: ", conditionMessage(e)); empty_td })
}

tidy_inflate_model <- function(model, data = NULL, level = 0.95) {
  tryCatch({
    if (inherits(model, "zeroinfl")) tidy_zeroinfl_part(model, data, "zero", level) else empty_td
  }, error = function(e) { message("  [tidy_inflate_model] error: ", conditionMessage(e)); empty_td })
}

model_N <- function(m) {
  out <- tryCatch(stats::nobs(m), error = function(e) NA_integer_)
  if (is.na(out)) out <- tryCatch(nrow(stats::model.frame(m)), error = function(e) NA_integer_)
  out
}

tidy_panel <- function(formula_str, data) {
  m <- tryCatch(stats::lm(stats::as.formula(formula_str), data = data),
                error = function(e) NULL)
  if (is.null(m)) return(empty_td)
  V <- stats::vcov(m); b <- stats::coef(m)
  se <- sqrt(pmax(diag(V), 0)); crit <- stats::qnorm(0.975); nms <- names(b)
  tibble::tibble(term = nms, estimate = as.numeric(b), std.error = as.numeric(se),
                 conf.low = as.numeric(b - crit*se), conf.high = as.numeric(b + crit*se),
                 p.value = as.numeric(2*stats::pnorm(abs(ifelse(se>0, b/se, NA)),
                                                     lower.tail = FALSE)))
}

SECTION_HEADERS <- list(
  troop_binary      = "Peacekeeping participation",
  lag_mid_bin       = "Reverse-causality / security environment",
  democracy_polity2 = "Standard controls"
)

NOTE_CI   <- "Entries are point estimates [95% CI]. Significance: * p < .10; ** p < .05; *** p < .01."
NOTE_CLUS <- "Inference: cluster-robust SE (GLM/ZINB) or profile-likelihood CI (ordered logit)."

build_table_df <- function(t1, t2, t3, term_order,
                           scale = c("coef", "or"),
                           sec_headers = SECTION_HEADERS) {
  scale <- match.arg(scale)
  ensure_td <- function(td) {
    if (is.null(td) || !is.data.frame(td) || !"term" %in% names(td)) return(empty_td)
    td
  }
  t1 <- ensure_td(t1); t2 <- ensure_td(t2); t3 <- ensure_td(t3)
  tlist <- list(t1, t2, t3)
  
  all_terms <- unique(unlist(lapply(tlist, function(x) x$term)))
  present   <- term_order[term_order %in% all_terms]
  
  if (length(present) == 0) {
    return(data.frame(Variable = "(models did not converge)",
                      M1 = "\u2014", M2 = "\u2014", M3 = "\u2014",
                      .is_header = FALSE, stringsAsFactors = FALSE))
  }
  
  rows     <- list()
  prev_sec <- ""
  for (trm in present) {
    hdr <- sec_headers[[trm]]
    if (!is.null(hdr) && hdr != prev_sec) {
      rows[[length(rows) + 1]] <- data.frame(
        Variable = hdr, M1 = "", M2 = "", M3 = "",
        .is_header = TRUE, stringsAsFactors = FALSE)
      prev_sec <- hdr
    }
    cells <- lapply(tlist, function(td) {
      r <- td[td$term == trm, , drop = FALSE]
      if (NROW(r) == 0) return("\u2014")
      if (scale == "or")
        fmt_cell_or(r$estimate[1], r$conf.low[1], r$conf.high[1], r$p.value[1])
      else
        fmt_cell_ci(r$estimate[1], r$conf.low[1], r$conf.high[1], r$p.value[1],
                    se = r$std.error[1])
    })
    rows[[length(rows) + 1]] <- data.frame(
      Variable = label_term(trm), M1 = cells[[1]], M2 = cells[[2]], M3 = cells[[3]],
      .is_header = FALSE, stringsAsFactors = FALSE)
  }
  do.call(rbind, rows)
}

add_n_row <- function(df, n_vec) {
  n_row <- data.frame(
    Variable = "Observations",
    M1 = formatC(n_vec[1], format = "d", big.mark = ","),
    M2 = formatC(n_vec[2], format = "d", big.mark = ","),
    M3 = formatC(n_vec[3], format = "d", big.mark = ","),
    .is_header = FALSE, stringsAsFactors = FALSE)
  if (is.null(df) || !is.data.frame(df) || NROW(df) == 0) return(n_row)
  rbind(df, n_row)
}

make_flextable <- function(df, title, notes,
                           col_widths = c(2.2, 1.3, 1.3, 1.3)) {
  hdr_rows <- which(as.logical(df$.is_header) == TRUE)
  obs_rows <- which(df$Variable == "Observations")
  df_out   <- df[, c("Variable", "M1", "M2", "M3")]
  ft <- flextable::flextable(df_out) |>
    flextable::set_header_labels(Variable = " ", M1 = "M1", M2 = "M2", M3 = "M3") |>
    flextable::padding(padding = 2, part = "all") |>
    flextable::line_spacing(space = 1.0, part = "body") |>
    flextable::font(fontname = "Times New Roman", part = "all") |>
    flextable::fontsize(size = 10, part = "all") |>
    flextable::width(j = 1, width = col_widths[1]) |>
    flextable::width(j = 2, width = col_widths[2]) |>
    flextable::width(j = 3, width = col_widths[3]) |>
    flextable::width(j = 4, width = col_widths[4]) |>
    flextable::bold(part = "header") |>
    flextable::align(j = 1, align = "left", part = "header") |>
    flextable::align(j = 2:4, align = "center", part = "header") |>
    flextable::align(j = 1, align = "left", part = "body") |>
    flextable::align(j = 2:4, align = "center", part = "body")
  if (length(hdr_rows) > 0) {
    ft <- ft |> flextable::bold(i = hdr_rows, part = "body") |>
      flextable::italic(i = hdr_rows, part = "body") |>
      flextable::bg(i = hdr_rows, bg = "#F2F2F2", part = "body")
  }
  if (length(obs_rows) > 0) ft <- flextable::italic(ft, i = obs_rows, part = "body")
  ft <- ft |> flextable::border_remove() |>
    flextable::hline_top(border = officer::fp_border(color = "black", width = 1.5), part = "header") |>
    flextable::hline_bottom(border = officer::fp_border(color = "black", width = 0.75), part = "header") |>
    flextable::hline_bottom(border = officer::fp_border(color = "black", width = 1.5), part = "body") |>
    flextable::add_header_lines(values = title) |>
    flextable::bold(i = 1, part = "header") |>
    flextable::fontsize(i = 1, size = 11, part = "header") |>
    flextable::align(i = 1, align = "left", part = "header")
  for (note in rev(notes))
    ft <- flextable::add_footer_lines(ft, values = note)
  ft <- ft |> flextable::italic(part = "footer") |>
    flextable::fontsize(size = 9, part = "footer") |>
    flextable::align(align = "left", part = "footer") |>
    flextable::color(color = "gray40", part = "footer")
  ft
}

make_simple_ft <- function(df, title, footer = "") {
  ft <- flextable::flextable(df) |>
    flextable::font(fontname = "Times New Roman", part = "all") |>
    flextable::fontsize(size = 9, part = "all") |>
    flextable::bold(part = "header") |>
    flextable::align(j = 1, align = "left", part = "all") |>
    flextable::border_remove() |>
    flextable::hline_top(border = officer::fp_border(color = "black", width = 1.5), part = "header") |>
    flextable::hline_bottom(border = officer::fp_border(color = "black", width = 0.75), part = "header") |>
    flextable::hline_bottom(border = officer::fp_border(color = "black", width = 1.5), part = "body") |>
    flextable::add_header_lines(title) |>
    flextable::bold(i = 1, part = "header") |>
    flextable::fontsize(i = 1, size = 11, part = "header")
  if (ncol(df) > 1) ft <- flextable::align(ft, j = 2:ncol(df), align = "center", part = "all")
  if (nzchar(footer[1])) {
    ft <- ft |> flextable::add_footer_lines(paste(footer, collapse = " ")) |>
      flextable::italic(part = "footer") |>
      flextable::fontsize(size = 8, part = "footer") |>
      flextable::color(color = "gray40", part = "footer")
  }
  ft
}

SEC_NONE <- list()

# Section 8. Model Wrappers
fit_trio <- function(rhs_fn, data_ol, data_lg, data_z, level = 0.95) {
  ol_n <- integer(length(IVS_safe))
  ol_td <- lapply(seq_along(IVS_safe), function(i) {
    iv <- IVS_safe[i]
    m <- tryCatch(MASS::polr(stats::as.formula(paste0("hostlev_f ~ ", rhs_fn(iv))),
                             data = data_ol, method = "logistic", Hess = TRUE, model = TRUE),
                  error = function(e) { message("  [WARN] ologit: ", conditionMessage(e)); NULL })
    ol_n[i] <<- model_N(m)
    if (!is.null(m)) tidy_polr_robust(m, level) else empty_td
  })
  lg_n <- integer(length(IVS_safe))
  lg_td <- lapply(seq_along(IVS_safe), function(i) {
    iv <- IVS_safe[i]
    m <- tryCatch(stats::glm(stats::as.formula(paste0("initiator ~ ", rhs_fn(iv))),
                             data = data_lg, family = stats::binomial("logit")),
                  error = function(e) { message("  [WARN] logit: ", conditionMessage(e)); NULL })
    lg_n[i] <<- model_N(m)
    if (!is.null(m)) tidy_glm_simple(m, level) else empty_td
  })
  z_n <- integer(length(IVS_safe))
  z_td <- lapply(seq_along(IVS_safe), function(i) {
    iv <- IVS_safe[i]
    infl_new_rhs <- if (length(INFL_NEW)) paste(INFL_NEW, collapse = " + ") else infl_rhs()
    m <- tryCatch(pscl::zeroinfl(
      stats::as.formula(paste0("mid_sum_clean ~ ", rhs_fn(iv),
                               " | ", infl_new_rhs)),
      data = data_z, dist = "negbin", link = "logit",
      control = pscl::zeroinfl.control(maxit = 3000)),
      error = function(e) tryCatch(
        MASS::glm.nb(stats::as.formula(paste0("mid_sum_clean ~ ", rhs_fn(iv))),
                     data = data_z, control = stats::glm.control(maxit = 100)),
        error = function(e2) NULL))
    z_n[i] <<- model_N(m)
    if (!is.null(m)) tidy_count_model(m, data_z, level) else empty_td
  })
  list(ol = ol_td, lg = lg_td, z = z_td,
       n_ol = ol_n, n_lg = lg_n, n_z = z_n)
}

make_trio_tables <- function(trio, term_order, n_ol, n_lg, n_z,
                             prefix, title_ol, title_lg, title_z,
                             scale_ol = "coef", scale_lg = "or", scale_z = "coef",
                             sec_hdr = SECTION_HEADERS, notes_extra = character(0)) {
  use_n_ol <- if (!is.null(trio$n_ol)) ifelse(is.na(trio$n_ol), n_ol, trio$n_ol) else n_ol
  use_n_lg <- if (!is.null(trio$n_lg)) ifelse(is.na(trio$n_lg), n_lg, trio$n_lg) else n_lg
  use_n_z  <- if (!is.null(trio$n_z))  ifelse(is.na(trio$n_z),  n_z,  trio$n_z)  else n_z
  ft_ol <- build_table_df(trio$ol[[1]], trio$ol[[2]], trio$ol[[3]], term_order, scale_ol, sec_hdr) |>
    add_n_row(use_n_ol) |> make_flextable(title_ol, c(NOTE_CI, notes_extra))
  ft_lg <- build_table_df(trio$lg[[1]], trio$lg[[2]], trio$lg[[3]], term_order, scale_lg, sec_hdr) |>
    add_n_row(use_n_lg) |> make_flextable(title_lg, c(NOTE_CI, if (scale_lg == "or") "Odds ratios reported.", notes_extra))
  ft_z <- build_table_df(trio$z[[1]], trio$z[[2]], trio$z[[3]], term_order, scale_z, sec_hdr) |>
    add_n_row(use_n_z) |> make_flextable(title_z, c(NOTE_CI, notes_extra))
  list(ft_ol, ft_lg, ft_z)
}

get_or <- function(model, iv) {
  if (is.null(model)) return("\u2014")
  td <- tryCatch(tidy_glm_simple(model), error = function(e) empty_td)
  if (is.null(td) || !is.data.frame(td) || !"term" %in% names(td)) return("\u2014")
  r <- td[td$term == iv, , drop = FALSE]
  if (NROW(r) == 0) return("\u2014")
  fmt_cell_or(r$estimate, r$conf.low, r$conf.high, r$p.value)
}
get_coef <- function(model, iv) {
  if (is.null(model)) return("\u2014")
  td <- tryCatch({
    if (inherits(model, "zeroinfl")) tidy_count_model(model) else tidy_glm_simple(model)
  }, error = function(e) empty_td)
  if (is.null(td) || !is.data.frame(td) || !"term" %in% names(td)) return("\u2014")
  r <- td[td$term == iv, , drop = FALSE]
  if (NROW(r) == 0) return("\u2014")
  fmt_cell_ci(r$estimate, r$conf.low, r$conf.high, r$p.value, se = r$std.error)
}

# Section 9. Baseline Models
message("\n[9] Estimating main models...")

rhs_str  <- function(iv) paste(c(iv, RHS_COMMON), collapse = " + ")
infl_str <- function(v)  paste(v, collapse = " + ")

d_T3$mid_sum_clean <- ifelse(
  d_T3$lag_mid_bin == 1 | d_T3$persistent_dispute == 1,
  0L, as.integer(d_T3$mid_sum))
message("  Created: mid_sum_clean on d_T3")

d0$mid_sum_clean <- ifelse(
  as.numeric(d0$lag_mid_bin) == 1 | as.numeric(d0$persistent_dispute) == 1,
  0L, as.integer(d0$mid_sum))
message("  Created: mid_sum_clean on d0")

INFL_NEW <- keep_present(c("cum_peace_10pp", "high_igo", "no_contig"))
message("  INFL_NEW: ", paste(INFL_NEW, collapse = " + "))

fit_ologit <- function(iv, data) {
  MASS::polr(stats::as.formula(paste0("hostlev_f ~ ", rhs_str(iv))),
             data = data, method = "logistic", Hess = TRUE, model = TRUE)
}
fit_logit <- function(iv, data) {
  stats::glm(stats::as.formula(paste0("initiator ~ ", rhs_str(iv))),
             data = data, family = stats::binomial(link = "logit"))
}
fit_zinb <- function(iv, data) {
  f_count <- paste0("mid_sum ~ ", rhs_str(iv))
  infl    <- if (length(INFL_NEW)) INFL_NEW else INFL_SIMPLE
  if (!length(infl)) {
    message("  No inflation variables available; estimating standard NB.")
    return(MASS::glm.nb(stats::as.formula(f_count), data = data, control = stats::glm.control(maxit = 100)))
  }
  f_full <- paste0(f_count, " | ", infl_str(infl))
  message("  Formula: ", f_full)
  m <- tryCatch(
    pscl::zeroinfl(
      stats::as.formula(f_full),
      data = data, dist = "negbin", link = "logit",
      control = pscl::zeroinfl.control(maxit = 3000)
    ),
    error = function(e) {
      message("  ZINB failed for '", iv, "': ", conditionMessage(e))
      NULL
    }
  )
  if (!is.null(m)) return(m)
  message("  ZINB did not converge for '", iv, "'; falling back to NB.")
  MASS::glm.nb(stats::as.formula(f_count), data = data, control = stats::glm.control(maxit = 100))
}

m_ol_1 <- tryCatch(fit_ologit("troop_binary", d_T1), error = function(e) {
  message("  [ERR] OLogit M1: ", conditionMessage(e)); NULL })
m_ol_2 <- tryCatch(fit_ologit("ln_max_total_peacekeeper", d_T1), error = function(e) {
  message("  [ERR] OLogit M2: ", conditionMessage(e)); NULL })
m_ol_3 <- tryCatch(fit_ologit("logged_pk5years", d_T1), error = function(e) {
  message("  [ERR] OLogit M3: ", conditionMessage(e)); NULL })

m_lg_1 <- tryCatch(fit_logit("troop_binary", d_T2), error = function(e) {
  message("  [ERR] Logit M1: ", conditionMessage(e)); NULL })
m_lg_2 <- tryCatch(fit_logit("ln_max_total_peacekeeper", d_T2), error = function(e) {
  message("  [ERR] Logit M2: ", conditionMessage(e)); NULL })
m_lg_3 <- tryCatch(fit_logit("logged_pk5years", d_T2), error = function(e) {
  message("  [ERR] Logit M3: ", conditionMessage(e)); NULL })

m_z_1 <- tryCatch(fit_zinb("troop_binary", d_T3), error = function(e) {
  message("  [ERR] ZINB M1: ", conditionMessage(e)); NULL })
m_z_2 <- tryCatch(fit_zinb("ln_max_total_peacekeeper", d_T3), error = function(e) {
  message("  [ERR] ZINB M2: ", conditionMessage(e)); NULL })
m_z_3 <- tryCatch(fit_zinb("logged_pk5years", d_T3), error = function(e) {
  message("  [ERR] ZINB M3: ", conditionMessage(e)); NULL })

message("  Computing tidy results...")
t1_1 <- if (!is.null(m_ol_1)) tidy_polr_robust(m_ol_1) else empty_td
t1_2 <- if (!is.null(m_ol_2)) tidy_polr_robust(m_ol_2) else empty_td
t1_3 <- if (!is.null(m_ol_3)) tidy_polr_robust(m_ol_3) else empty_td

t2_1 <- if (!is.null(m_lg_1)) tidy_glm_simple(m_lg_1) else empty_td
t2_2 <- if (!is.null(m_lg_2)) tidy_glm_simple(m_lg_2) else empty_td
t2_3 <- if (!is.null(m_lg_3)) tidy_glm_simple(m_lg_3) else empty_td

t3_1 <- if (!is.null(m_z_1)) { td <- tidy_count_model(m_z_1, d_T3); if (has_rows(td)) td else extract_count_direct(m_z_1) } else empty_td
t3_2 <- if (!is.null(m_z_2)) { td <- tidy_count_model(m_z_2, d_T3); if (has_rows(td)) td else extract_count_direct(m_z_2) } else empty_td
t3_3 <- if (!is.null(m_z_3)) { td <- tidy_count_model(m_z_3, d_T3); if (has_rows(td)) td else extract_count_direct(m_z_3) } else empty_td

ti_1 <- if (!is.null(m_z_1)) { td <- tidy_inflate_model(m_z_1, d_T3); if (has_rows(td)) td else extract_inflate_direct(m_z_1) } else empty_td
ti_2 <- if (!is.null(m_z_2)) { td <- tidy_inflate_model(m_z_2, d_T3); if (has_rows(td)) td else extract_inflate_direct(m_z_2) } else empty_td
ti_3 <- if (!is.null(m_z_3)) { td <- tidy_inflate_model(m_z_3, d_T3); if (has_rows(td)) td else extract_inflate_direct(m_z_3) } else empty_td

N_T1 <- c(model_N(m_ol_1), model_N(m_ol_2), model_N(m_ol_3))
N_T2 <- c(model_N(m_lg_1), model_N(m_lg_2), model_N(m_lg_3))
N_T3 <- c(model_N(m_z_1),  model_N(m_z_2),  model_N(m_z_3))

message("  Models estimated. N_T1=", paste(N_T1, collapse=","),
        " N_T2=", paste(N_T2, collapse=","),
        " N_T3=", paste(N_T3, collapse=","))

# Section 10. Appendix Tables
message("\n", strrep("=", 70))
message("           BUILDING APPENDIX TABLES (Sections A-I)")
message(strrep("=", 70))

message("\n=== SECTION A: Data, Measures, and Sample ===")

ALL_VARS <- unique(c("hostlev", "initiator", "mid_sum", IVS_safe, RC_safe, CTRL_safe))
summ_df <- do.call(rbind, lapply(ALL_VARS, function(v) {
  x <- as.numeric(d0[[v]])
  data.frame(Variable = label_term(v), N = sum(!is.na(x)),
             Mean = formatC(mean(x, na.rm = TRUE), format = "f", digits = 3),
             SD   = formatC(sd(x, na.rm = TRUE),   format = "f", digits = 3),
             Min  = formatC(min(x, na.rm = TRUE),  format = "f", digits = 3),
             Max  = formatC(max(x, na.rm = TRUE),  format = "f", digits = 3),
             stringsAsFactors = FALSE)
}))
ft_A1 <- make_simple_ft(summ_df, "Table A1. Summary Statistics",
                        "Full dataset before listwise deletion.")

cor_vars <- c(IVS_safe, RC_safe, CTRL_safe)
cor_mat <- cor(as.data.frame(lapply(d0[, cor_vars, drop = FALSE], as.numeric)),
               use = "pairwise.complete.obs")
cor_df <- data.frame(Variable = label_term(rownames(cor_mat)), stringsAsFactors = FALSE)
for (j in seq_len(ncol(cor_mat))) {
  vals <- formatC(cor_mat[, j], format = "f", digits = 2)
  vals[seq_len(j - 1)] <- ""
  cor_df[[label_term(colnames(cor_mat)[j])]] <- vals
}
ft_A2 <- make_simple_ft(cor_df, "Table A2. Correlation Matrix", "Lower triangle.") |>
  flextable::fontsize(size = 7, part = "body")

miss_vars <- unique(c("hostlev", "initiator", "mid_sum", IVS_safe, RC_safe, CTRL_safe))
miss_df <- do.call(rbind, lapply(miss_vars, function(v) {
  n <- length(d0[[v]]); m <- sum(is.na(d0[[v]]))
  data.frame(Variable = label_term(v),
             N = formatC(n, format = "d", big.mark = ","),
             Missing = formatC(m, format = "d", big.mark = ","),
             Pct = paste0(formatC(100 * m / n, format = "f", digits = 1), "%"),
             stringsAsFactors = FALSE)
}))
ft_A3 <- make_simple_ft(miss_df, "Table A2. Missing Data", "Full dataset.")

var_defs <- tryCatch({
  data.frame(
    Variable = c("hostlev", "initiator", "mid_sum", IVS_safe, RC_safe, CTRL_safe),
    Definition = c(
      "Highest hostility level (0=None to 5=War)",
      "MID initiator binary (1 = initiated MID)",
      "Count of MIDs in country-year",
      "PK troop presence binary (1 = yes)",
      "ln(max total peacekeepers deployed)",
      "ln(cumulative 5-year PK participation stock)",
      sapply(RC_safe, function(v) switch(v,
                                         lag_mid_bin = "Lagged MID binary (reverse-causality control)",
                                         lag_hostlev = "Lagged hostility level",
                                         lag_mid_count = "Lagged MID count",
                                         ucdp_intrastate = "UCDP intrastate conflict",
                                         persistent_dispute = "Persistent dispute (prior 5 years)",
                                         ucdp_interstate_5yr = "Interstate conflict (prior 5 years)",
                                         paste0("RC: ", v))),
      sapply(CTRL_safe, function(v) switch(v,
                                           democracy_polity2 = "Polity2 democracy score",
                                           ln_capability = "ln(CINC material capability index)",
                                           trade_openness = "Trade as % of GDP",
                                           igo_membership = "Count of IGO memberships",
                                           ln_gdp_cap = "ln(GDP per capita)",
                                           paste0("Control: ", v)))
    ), stringsAsFactors = FALSE)
}, error = function(e) {
  data.frame(Variable = "(error)", Definition = conditionMessage(e), stringsAsFactors = FALSE)
})
ft_A4 <- make_simple_ft(var_defs, "Table A4. Variable Definitions and Coding Rules",
                        "See main text for detailed operationalization and sources.")

ft_A5 <- NULL
if ("time" %in% names(d0)) {
  yr_df <- do.call(rbind, lapply(sort(unique(d0$time)), function(y) {
    dy <- d0[d0$time == y, ]
    data.frame(Year = y, N = nrow(dy),
               PK_Rate = paste0(formatC(100 * mean(dy$troop_binary, na.rm = TRUE), format = "f", digits = 1), "%"),
               Hostility = formatC(mean(dy$hostlev, na.rm = TRUE), format = "f", digits = 2),
               Initiation = paste0(formatC(100 * mean(dy$initiator, na.rm = TRUE), format = "f", digits = 1), "%"),
               MID_Count = formatC(mean(dy$mid_sum, na.rm = TRUE), format = "f", digits = 2),
               stringsAsFactors = FALSE)
  }))
  ft_A5 <- make_simple_ft(yr_df, "Table A5. Sample Composition by Year",
                          "PK rate = proportion with troop_binary=1.")
}

ft_A6 <- NULL
if (length(REGION_safe) > 0) {
  reg_labels <- c(asia = "Asia", africa = "Africa", north_america = "N. America",
                  south_america = "S. America", europe = "Europe", middle_east = "Middle East")
  reg_rows <- lapply(REGION_safe, function(r) {
    d_r <- d0[!is.na(d0[[r]]) & d0[[r]] == 1, ]
    if (nrow(d_r) == 0) return(NULL)
    data.frame(Region = ifelse(r %in% names(reg_labels), reg_labels[r], r),
               N = nrow(d_r),
               PK_Rate = paste0(formatC(100 * mean(d_r$troop_binary, na.rm = TRUE), format = "f", digits = 1), "%"),
               Hostility = formatC(mean(d_r$hostlev, na.rm = TRUE), format = "f", digits = 2),
               Initiation = paste0(formatC(100 * mean(d_r$initiator, na.rm = TRUE), format = "f", digits = 1), "%"),
               stringsAsFactors = FALSE)
  })
  reg_rows <- reg_rows[!sapply(reg_rows, is.null)]
  if (length(reg_rows) > 0)
    ft_A6 <- make_simple_ft(do.call(rbind, reg_rows), "Table A6. Sample Composition by Region", "")
}

pk_df <- data.frame(
  Measure = c("PK Troop Presence (binary)", "ln(Max Peacekeepers)", "ln(5-yr PK Stock)"),
  N = sapply(IVS_safe, function(v) sum(!is.na(d0[[v]]))),
  Mean = sapply(IVS_safe, function(v) formatC(mean(as.numeric(d0[[v]]), na.rm=TRUE), format="f", digits=3)),
  SD = sapply(IVS_safe, function(v) formatC(sd(as.numeric(d0[[v]]), na.rm=TRUE), format="f", digits=3)),
  Nonzero = sapply(IVS_safe, function(v) {
    x <- as.numeric(d0[[v]])
    paste0(sum(x > 0, na.rm = TRUE), " (",
           formatC(100*mean(x > 0, na.rm = TRUE), format="f", digits=1), "%)")
  }), stringsAsFactors = FALSE)
ft_A7 <- make_simple_ft(pk_df, "Table A7. Peacekeeping Participation Descriptives", "Full dataset.")

ft_A8 <- NULL
if ("cow" %in% names(d0) && "troop_binary" %in% names(d0)) {
  top_tcc <- aggregate(troop_binary ~ cow, data = d0, FUN = sum, na.rm = TRUE)
  top_tcc <- top_tcc[order(-top_tcc$troop_binary), ][1:min(20, nrow(top_tcc)), ]
  top_tcc$Years_with_PK <- top_tcc$troop_binary
  total_yr <- aggregate(troop_binary ~ cow, data = d0, FUN = length)
  top_tcc <- merge(top_tcc, total_yr, by = "cow", suffixes = c("", "_total"))
  top_tcc$Rate <- paste0(formatC(100 * top_tcc$Years_with_PK / top_tcc$troop_binary_total,
                                 format = "f", digits = 0), "%")
  top_tcc_df <- data.frame(COW = top_tcc$cow, Years_with_PK = top_tcc$Years_with_PK,
                           Total_Years = top_tcc$troop_binary_total, Rate = top_tcc$Rate, stringsAsFactors = FALSE)
  ft_A8 <- make_simple_ft(top_tcc_df, "Table A8. Top Troop-Contributing Countries",
                          "Ranked by total country-years with PK troop presence.")
}

message("  Section A complete (A1-A8).")

message("\n=== SECTION B: Baseline Results and Linear Benchmarks ===")

SEC_FULL <- list()
if ("troop_binary" %in% TERM_ORDER_FULL_safe) SEC_FULL[["troop_binary"]] <- "PK variable"
if ("lag_mid_bin" %in% TERM_ORDER_FULL_safe)   SEC_FULL[["lag_mid_bin"]]  <- "RC"
if ("democracy_polity2" %in% TERM_ORDER_FULL_safe) SEC_FULL[["democracy_polity2"]] <- "Controls"
if ("asia" %in% TERM_ORDER_FULL_safe) SEC_FULL[["asia"]] <- "Region FE"
if ("time" %in% TERM_ORDER_FULL_safe) SEC_FULL[["time"]] <- "Time poly"

message("  B1-B3: Full models (all coefficients)...")
ft_B1 <- build_table_df(t1_1, t1_2, t1_3, TERM_ORDER_FULL_safe, "coef", SEC_FULL) |>
  add_n_row(N_T1) |> make_flextable("Table B1. Full Ordered Logit -- Hostility (All Coefficients)", c(NOTE_CI))
ft_B2 <- build_table_df(t2_1, t2_2, t2_3, TERM_ORDER_FULL_safe, "or", SEC_FULL) |>
  add_n_row(N_T2) |> make_flextable("Table B2. Full Logit (OR) -- Initiation (All Coefficients)", c(NOTE_CI, "OR."))
ft_B3 <- build_table_df(t3_1, t3_2, t3_3, TERM_ORDER_FULL_safe, "coef", SEC_FULL) |>
  add_n_row(N_T3) |> make_flextable("Table B3. Full ZINB -- MID Count (Count Part)",
                                    c(NOTE_CI, "Count-model coefficients from zero-inflated negative binomial regression.",
                                      "Inflate: cum_peace_10pp + high_igo + no_contig."))

message("  B4: Inflation equation (updated inflate vars)...")
ft_B4 <- build_table_df(ti_1, ti_2, ti_3, INFL_NEW, "coef") |>
  add_n_row(N_T3) |> make_flextable("Table B4. Full ZINB -- Inflation Equation",
                                    c(NOTE_CI, "Logit link. Positive = more likely structural zero.",
                                      "Inflate: cum_peace_10pp + high_igo + no_contig."))

message("  B5-B6: PSM balance + matched OLogit...")
ft_B5 <- NULL; ft_B6 <- NULL
d_matched_B <- NULL

psm_covs_B <- intersect(c(RC_safe, CTRL_safe), names(d_T1))

if (requireNamespace("MatchIt", quietly = TRUE) && length(psm_covs_B) >= 2) {
  psm_fmla_B <- stats::as.formula(paste0("troop_binary ~ ", paste(psm_covs_B, collapse = " + ")))
  psm_obj_B <- tryCatch(MatchIt::matchit(psm_fmla_B, data = d_T1, method = "nearest",
                                         distance = "logit", caliper = 0.1, ratio = 1), error = function(e) {
                                           message("  [WARN] PSM failed: ", conditionMessage(e)); NULL })
  
  if (!is.null(psm_obj_B)) {
    bal_B <- tryCatch({
      s <- summary(psm_obj_B)
      if (!is.null(s$sum.matched)) {
        bd <- as.data.frame(s$sum.matched)
        bd$Variable <- label_term(rownames(bd))
        cnames <- colnames(bd)
        tr_col  <- grep("Means Treated|Mean Treated", cnames, value = TRUE)[1]
        ct_col  <- grep("Means Control|Mean Control", cnames, value = TRUE)[1]
        smd_col <- grep("Std. Mean Diff|Mean Diff",   cnames, value = TRUE)[1]
        if (!is.na(tr_col) && !is.na(ct_col) && !is.na(smd_col)) {
          bd <- bd[, c("Variable", tr_col, ct_col, smd_col)]
          names(bd) <- c("Variable", "Treated", "Control", "SMD")
          bd$Treated <- formatC(as.numeric(bd$Treated), format = "f", digits = 3)
          bd$Control <- formatC(as.numeric(bd$Control), format = "f", digits = 3)
          bd$SMD     <- formatC(as.numeric(bd$SMD),     format = "f", digits = 3)
          bd
        } else NULL
      } else NULL
    }, error = function(e) NULL)
    if (!is.null(bal_B)) {
      n_treat <- sum(d_T1$troop_binary == 1, na.rm = TRUE)
      n_ctrl  <- sum(d_T1$troop_binary == 0, na.rm = TRUE)
      n_match <- tryCatch(nrow(MatchIt::match.data(psm_obj_B)), error = function(e) NA)
      ft_B5 <- make_simple_ft(bal_B, "Table B5. PSM Balance Diagnostics (Post-Matching)",
                              paste0("Nearest-neighbor, caliper=0.1. SMD < 0.1 = good balance. ",
                                     "Treated=", n_treat, ", Control=", n_ctrl,
                                     if (!is.na(n_match)) paste0(", Matched N=", n_match) else "", "."))
    }
    
    d_matched_B <- tryCatch(MatchIt::match.data(psm_obj_B), error = function(e) NULL)
    if (!is.null(d_matched_B)) {
      d_matched_B$hostlev_f <- factor(d_matched_B$hostlev, levels = 0:5,
                                      labels = HOSTLEV_LABS, ordered = TRUE)
      psm_ol_td <- lapply(IVS_safe, function(iv) {
        rhs_psm <- paste(c(iv, psm_covs_B), collapse = " + ")
        m <- tryCatch(MASS::polr(stats::as.formula(paste0("hostlev_f ~ ", rhs_psm)),
                                 data = d_matched_B, weights = d_matched_B$weights,
                                 method = "logistic", Hess = TRUE, model = TRUE),
                      error = function(e) { message("  [WARN] PSM OLogit: ", conditionMessage(e)); NULL })
        if (!is.null(m)) tidy_polr_robust(m) else empty_td
      })
      psm_n <- rep(nrow(d_matched_B), 3)
      ft_B6 <- build_table_df(psm_ol_td[[1]], psm_ol_td[[2]], psm_ol_td[[3]],
                              c(IVS_safe, psm_covs_B), "coef") |>
        add_n_row(psm_n) |>
        make_flextable("Table B6. PSM Matched OLogit -- Hostility",
                       c(NOTE_CI, "Nearest-neighbor matching, caliper=0.1. Weighted by PSM weights."))
    }
  }
} else {
  message("  [SKIP] MatchIt not available or insufficient covariates for B5-B6.")
}

message("  B7-B8: PSM matched Logit + ZINB...")
ft_B7 <- NULL; ft_B8 <- NULL

if (!is.null(d_matched_B) && requireNamespace("MatchIt", quietly = TRUE)) {
  d_matched_B$initiator <- as.integer(d_matched_B$initiator)
  psm_lg_td <- lapply(IVS_safe, function(iv) {
    rhs_psm <- paste(c(iv, psm_covs_B), collapse = " + ")
    m <- tryCatch(stats::glm(stats::as.formula(paste0("initiator ~ ", rhs_psm)),
                             data = d_matched_B, family = stats::binomial("logit"),
                             weights = d_matched_B$weights),
                  error = function(e) { message("  [WARN] PSM Logit: ", conditionMessage(e)); NULL })
    if (!is.null(m)) tidy_glm_simple(m) else empty_td
  })
  psm_n <- rep(nrow(d_matched_B), 3)
  ft_B7 <- build_table_df(psm_lg_td[[1]], psm_lg_td[[2]], psm_lg_td[[3]],
                          c(IVS_safe, psm_covs_B), "or") |>
    add_n_row(psm_n) |>
    make_flextable("Table B7. PSM Matched Logit (OR) -- Initiation",
                   c(NOTE_CI, "Nearest-neighbor matching, caliper=0.1. OR reported."))
  
  d_matched_B$mid_sum_clean <- ifelse(
    as.numeric(d_matched_B$lag_mid_bin) == 1 | as.numeric(d_matched_B$persistent_dispute) == 1,
    0L, as.integer(d_matched_B$mid_sum))
  psm_z_rows <- lapply(IVS_safe, function(iv) {
    rhs_psm <- paste(c(iv, psm_covs_B), collapse = " + ")
    m <- tryCatch(MASS::glm.nb(stats::as.formula(paste0("mid_sum_clean ~ ", rhs_psm)),
                               data = d_matched_B, weights = d_matched_B$weights,
                               control = stats::glm.control(maxit = 100)),
                  error = function(e) NULL)
    if (is.null(m)) return(data.frame(Variable = label_term(iv), Coef = "\u2014",
                                      stringsAsFactors = FALSE))
    r <- tryCatch({
      td <- tidy_glm_simple(m)
      rr <- td[td$term == iv, ]
      if (NROW(rr)) fmt_cell_ci(rr$estimate, rr$conf.low, rr$conf.high, rr$p.value, se = rr$std.error)
      else "\u2014"
    }, error = function(e) "\u2014")
    data.frame(Variable = label_term(iv), Coef = r, stringsAsFactors = FALSE)
  })
  ft_B8 <- make_simple_ft(do.call(rbind, psm_z_rows),
                          "Table B8. PSM Matched NB -- MID Count (Troop Binary Effect)",
                          paste(NOTE_CI, "Matched sample NB regression. DV: mid_sum_clean."))
}

message("  B9-B11: Multiple imputation baseline models...")
ft_B9 <- NULL; ft_B10 <- NULL; ft_B11 <- NULL; ft_B12 <- NULL

if (requireNamespace("mice", quietly = TRUE)) {
  
  imp_vars_mi <- unique(c("hostlev", "initiator", "mid_sum", "mid_sum_clean",
                          "cow", IVS_safe, RC_safe, CTRL_safe, REGION_safe, TPOLY_safe,
                          "cum_peace_10pp", "high_igo", "no_contig"))
  imp_vars_mi <- intersect(imp_vars_mi, names(d0))
  d_mi <- as.data.frame(lapply(d0[, imp_vars_mi, drop = FALSE], function(x) as.numeric(x)))
  
  meth_vec <- rep("pmm", ncol(d_mi))
  names(meth_vec) <- names(d_mi)
  no_impute <- intersect(c("hostlev", "initiator", "mid_sum", "mid_sum_clean", "cow",
                           IVS_safe, RC_safe, REGION_safe, TPOLY_safe, "cum_peace_10pp", "high_igo", "no_contig"), names(d_mi))
  meth_vec[no_impute] <- ""
  
  n_imputations <- 10
  mi_obj <- tryCatch(mice::mice(d_mi, m = n_imputations, method = meth_vec,
                                maxit = 20, printFlag = FALSE, seed = 2025),
                     error = function(e) { message("  MI failed: ", conditionMessage(e)); NULL })
  
  if (!is.null(mi_obj)) {
    
    n_orig <- sum(stats::complete.cases(d_mi[, intersect(c("hostlev", IVS_safe, RC_safe, CTRL_safe), names(d_mi))]))
    n_mi   <- nrow(d_mi)
    message("    MI: ", n_imputations, " imputations. Listwise N=", n_orig,
            " -> MI N=", n_mi, " (recovered ", n_mi - n_orig, " obs)")
    
    pool_mi_estimates <- function(fit_fn, tidy_fn, level = 0.95) {
      all_td <- list()
      for (mm in seq_len(n_imputations)) {
        d_m <- tryCatch(mice::complete(mi_obj, mm), error = function(e) NULL)
        if (is.null(d_m)) next
        d_m$hostlev_f <- factor(d_m$hostlev, levels = 0:5,
                                labels = HOSTLEV_LABS, ordered = TRUE)
        if (!"mid_sum_clean" %in% names(d_m) || all(is.na(d_m$mid_sum_clean))) {
          d_m$mid_sum_clean <- ifelse(
            d_m$lag_mid_bin == 1 | d_m$persistent_dispute == 1,
            0L, as.integer(d_m$mid_sum))
        }
        for (iv_name in c("cum_peace_10pp", "high_igo", "no_contig")) {
          if (!iv_name %in% names(d_m) || all(is.na(d_m[[iv_name]]))) {
            if (iv_name %in% names(d0)) d_m[[iv_name]] <- as.numeric(d0[[iv_name]])
          }
        }
        for (vv in c(REGION_safe, TPOLY_safe)) {
          if (!vv %in% names(d_m) && vv %in% names(d0))
            d_m[[vv]] <- as.numeric(d0[[vv]])
        }
        m <- tryCatch(fit_fn(d_m), error = function(e) NULL)
        if (!is.null(m)) {
          td <- tryCatch(tidy_fn(m), error = function(e) NULL)
          if (!is.null(td) && is.data.frame(td) && NROW(td) > 0)
            all_td[[length(all_td) + 1]] <- td
        }
      }
      if (length(all_td) < 2) return(empty_td)
      all_terms <- unique(unlist(lapply(all_td, function(x) x$term)))
      crit <- stats::qnorm(1 - (1 - level) / 2)
      pooled <- lapply(all_terms, function(trm) {
        ests <- sapply(all_td, function(td) {
          r <- td[td$term == trm, ]
          if (NROW(r)) r$estimate[1] else NA_real_
        })
        ses <- sapply(all_td, function(td) {
          r <- td[td$term == trm, ]
          if (NROW(r)) r$std.error[1] else NA_real_
        })
        ests <- ests[!is.na(ests)]; ses <- ses[!is.na(ses)]
        if (length(ests) < 2) return(NULL)
        Q_bar <- mean(ests)
        U_bar <- mean(ses^2)
        B     <- var(ests)
        T_var <- U_bar + (1 + 1/length(ests)) * B
        se_pool <- sqrt(T_var)
        z_val <- if (se_pool > 0) Q_bar / se_pool else NA_real_
        p_val <- if (!is.na(z_val)) 2 * stats::pnorm(abs(z_val), lower.tail = FALSE) else NA_real_
        tibble::tibble(term = trm, estimate = Q_bar, std.error = se_pool,
                       conf.low = Q_bar - crit * se_pool, conf.high = Q_bar + crit * se_pool,
                       p.value = p_val)
      })
      do.call(rbind, Filter(Negate(is.null), pooled))
    }
    
    message("    MI OLogit...")
    mi_ol_td <- lapply(IVS_safe, function(iv) {
      pool_mi_estimates(
        fit_fn = function(d_m) MASS::polr(stats::as.formula(paste0("hostlev_f ~ ", rhs_str(iv))),
                                          data = d_m, method = "logistic", Hess = TRUE, model = TRUE),
        tidy_fn = function(m) tidy_polr_robust(m))
    })
    mi_N_ol <- rep(nrow(d_mi), 3)
    ft_B9 <- build_table_df(mi_ol_td[[1]], mi_ol_td[[2]], mi_ol_td[[3]],
                            TERM_ORDER_FULL_safe, "coef", SEC_FULL) |>
      add_n_row(mi_N_ol) |>
      make_flextable("Table B5. MI Ordered Logit -- Hostility (All Coefficients)",
                     c(NOTE_CI, paste0(n_imputations, " imputations (PMM); Rubin's rules. N recovered from ",
                                       n_orig, " to ", n_mi, ".")))
    
    message("    MI Logit...")
    mi_lg_td <- lapply(IVS_safe, function(iv) {
      pool_mi_estimates(
        fit_fn = function(d_m) stats::glm(stats::as.formula(paste0("initiator ~ ", rhs_str(iv))),
                                          data = d_m, family = stats::binomial("logit")),
        tidy_fn = function(m) tidy_glm_simple(m))
    })
    mi_N_lg <- rep(nrow(d_mi), 3)
    ft_B10 <- build_table_df(mi_lg_td[[1]], mi_lg_td[[2]], mi_lg_td[[3]],
                             TERM_ORDER_FULL_safe, "or", SEC_FULL) |>
      add_n_row(mi_N_lg) |>
      make_flextable("Table B6. MI Logit (OR) -- Initiation (All Coefficients)",
                     c(paste0(n_imputations, " imputations; N = ", n_mi, " (vs listwise ", n_orig, ")."),
                       "OR reported.",
                       NOTE_CI))
    
    ft_B11 <- NULL
    
    message("    MI comparison summary...")
    comp_rows <- lapply(IVS_safe, function(iv) {
      idx <- match(iv, IVS_safe)
      orig_ol <- if (nrow(t1_1) > 0 && idx == 1) t1_1 else
        if (nrow(t1_2) > 0 && idx == 2) t1_2 else
          if (nrow(t1_3) > 0 && idx == 3) t1_3 else empty_td
      r_o <- orig_ol[orig_ol$term == iv, ]
      cell_orig <- if (NROW(r_o) > 0) {
        fmt_cell_ci(r_o$estimate, r_o$conf.low, r_o$conf.high, r_o$p.value, se = r_o$std.error)
      } else "\u2014"
      mi_td <- mi_ol_td[[idx]]
      r_m <- mi_td[mi_td$term == iv, ]
      cell_mi <- if (NROW(r_m) > 0) {
        fmt_cell_ci(r_m$estimate, r_m$conf.low, r_m$conf.high, r_m$p.value, se = r_m$std.error)
      } else "\u2014"
      data.frame(Variable = label_term(iv),
                 Listwise_OLogit = cell_orig, MI_OLogit = cell_mi,
                 Listwise_N = formatC(N_T1[idx], format = "d", big.mark = ","),
                 MI_N = formatC(nrow(d_mi), format = "d", big.mark = ","),
                 stringsAsFactors = FALSE)
    })
    ft_B12 <- make_simple_ft(do.call(rbind, comp_rows),
                             "Table B7. Listwise vs MI Comparison -- OLogit (Troop Effect Only)",
                             paste0(NOTE_CI, " ", n_imputations, " imputations (PMM). MI recovers ",
                                    n_mi - n_orig, " observations."))
    
  } else {
    message("  [WARN] MI object is NULL; B5-B7 skipped.")
  }
} else {
  message("  [SKIP] mice not available; B5-B7 skipped.")
}

message("  B13: IPW baseline estimates...")
ft_B13 <- NULL

ipw_covs <- intersect(c(RC_safe, CTRL_safe), names(d_T1))
ps_mod_B <- tryCatch(stats::glm(stats::as.formula(paste0("troop_binary ~ ",
                                                         paste(ipw_covs, collapse = " + "))),
                                data = d_T1, family = stats::binomial("logit")), error = function(e) NULL)

if (!is.null(ps_mod_B)) {
  d_T1$ps_B <- stats::predict(ps_mod_B, type = "response")
  d_T1$ipw_B <- ifelse(d_T1$troop_binary == 1,
                       1 / pmax(d_T1$ps_B, 0.01),
                       1 / pmax(1 - d_T1$ps_B, 0.01))
  d_T1$ipw_B <- pmin(d_T1$ipw_B, quantile(d_T1$ipw_B, 0.99, na.rm = TRUE))
  
  ps_mod_T2 <- tryCatch(stats::glm(stats::as.formula(paste0("troop_binary ~ ",
                                                            paste(intersect(ipw_covs, names(d_T2)), collapse = " + "))),
                                   data = d_T2, family = stats::binomial("logit")), error = function(e) NULL)
  if (!is.null(ps_mod_T2)) {
    d_T2$ps_B <- stats::predict(ps_mod_T2, type = "response")
    d_T2$ipw_B <- ifelse(d_T2$troop_binary == 1, 1/pmax(d_T2$ps_B, 0.01), 1/pmax(1 - d_T2$ps_B, 0.01))
    d_T2$ipw_B <- pmin(d_T2$ipw_B, quantile(d_T2$ipw_B, 0.99, na.rm = TRUE))
  }
  ps_mod_T3 <- tryCatch(stats::glm(stats::as.formula(paste0("troop_binary ~ ",
                                                            paste(intersect(ipw_covs, names(d_T3)), collapse = " + "))),
                                   data = d_T3, family = stats::binomial("logit")), error = function(e) NULL)
  if (!is.null(ps_mod_T3)) {
    d_T3$ps_B <- stats::predict(ps_mod_T3, type = "response")
    d_T3$ipw_B <- ifelse(d_T3$troop_binary == 1, 1/pmax(d_T3$ps_B, 0.01), 1/pmax(1 - d_T3$ps_B, 0.01))
    d_T3$ipw_B <- pmin(d_T3$ipw_B, quantile(d_T3$ipw_B, 0.99, na.rm = TRUE))
  }
  
  ipw_sum_rows <- lapply(IVS_safe, function(iv) {
    c_ol <- tryCatch({
      m <- MASS::polr(stats::as.formula(paste0("hostlev_f ~ ", rhs_full(iv))),
                      data = d_T1, weights = d_T1$ipw_B, method = "logistic", Hess = TRUE, model = TRUE)
      td <- tidy_polr_robust(m)
      r <- td[td$term == iv, ]
      if (NROW(r)) fmt_cell_ci(r$estimate, r$conf.low, r$conf.high, r$p.value, se = r$std.error) else "\u2014"
    }, error = function(e) "\u2014")
    c_lg <- tryCatch({
      m <- stats::glm(stats::as.formula(paste0("initiator ~ ", rhs_full(iv))),
                      data = d_T2, family = stats::binomial("logit"), weights = d_T2$ipw_B)
      get_or(m, iv)
    }, error = function(e) "\u2014")
    c_z <- tryCatch({
      m <- MASS::glm.nb(stats::as.formula(paste0("mid_sum_clean ~ ", rhs_full(iv))),
                        data = d_T3, weights = d_T3$ipw_B, control = stats::glm.control(maxit = 100))
      td <- tidy_glm_simple(m)
      r <- td[td$term == iv, ]
      if (NROW(r)) fmt_cell_ci(r$estimate, r$conf.low, r$conf.high, r$p.value, se = r$std.error) else "\u2014"
    }, error = function(e) "\u2014")
    data.frame(Variable = label_term(iv), IPW_OLogit = c_ol, IPW_Logit_OR = c_lg,
               IPW_NB = c_z, stringsAsFactors = FALSE)
  })
  ft_B13 <- make_simple_ft(do.call(rbind, ipw_sum_rows),
                           "Table B13. IPW Baseline Estimates -- All DVs (Troop Effect Only)",
                           paste(NOTE_CI, "Inverse probability weighted. Trimmed at 99th pctile."))
}

message("  B14: Doubly robust / AIPW estimates...")
ft_B14 <- NULL

if (!is.null(ps_mod_B) && "ipw_B" %in% names(d_T2)) {
  aipw_rows <- lapply(IVS_safe, function(iv) {
    tryCatch({
      rhs_out <- rhs_full(iv)
      d1 <- d_T2[d_T2$troop_binary == 1, ]
      d0_sub <- d_T2[d_T2$troop_binary == 0, ]
      m1 <- stats::glm(stats::as.formula(paste0("initiator ~ ", rhs_out)),
                       data = d1, family = stats::binomial("logit"))
      m0 <- stats::glm(stats::as.formula(paste0("initiator ~ ", rhs_out)),
                       data = d0_sub, family = stats::binomial("logit"))
      mu1 <- predict(m1, newdata = d_T2, type = "response")
      mu0 <- predict(m0, newdata = d_T2, type = "response")
      ps <- d_T2$ps_B
      Y <- d_T2$initiator
      A <- d_T2$troop_binary
      aipw_1 <- mean(A * Y / pmax(ps, 0.01) - (A - ps) / pmax(ps, 0.01) * mu1)
      aipw_0 <- mean((1 - A) * Y / pmax(1 - ps, 0.01) + (A - ps) / pmax(1 - ps, 0.01) * mu0)
      ate <- aipw_1 - aipw_0
      boot_ate <- replicate(200, {
        idx <- sample(nrow(d_T2), replace = TRUE)
        db <- d_T2[idx, ]
        ps_b <- db$ps_B; Y_b <- db$initiator; A_b <- db$troop_binary
        mu1_b <- predict(m1, newdata = db, type = "response")
        mu0_b <- predict(m0, newdata = db, type = "response")
        a1 <- mean(A_b * Y_b / pmax(ps_b, 0.01) - (A_b - ps_b) / pmax(ps_b, 0.01) * mu1_b)
        a0 <- mean((1-A_b) * Y_b / pmax(1-ps_b, 0.01) + (A_b - ps_b) / pmax(1-ps_b, 0.01) * mu0_b)
        a1 - a0
      })
      se_ate <- sd(boot_ate, na.rm = TRUE)
      crit <- stats::qnorm(0.975)
      pv <- if (se_ate > 0) 2 * stats::pnorm(abs(ate / se_ate), lower.tail = FALSE) else NA
      cell <- paste0(formatC(ate, format = "f", digits = 4),
                     if (se_ate > 0) paste0(" (", formatC(se_ate, format = "f", digits = 4), ")") else "",
                     " [", formatC(ate - crit*se_ate, format = "f", digits = 4), ", ",
                     formatC(ate + crit*se_ate, format = "f", digits = 4), "]",
                     if (!is.na(pv) && pv < 0.001) "***" else if (!is.na(pv) && pv < 0.01) "**"
                     else if (!is.na(pv) && pv < 0.05) "*" else "")
      data.frame(Variable = label_term(iv), AIPW_ATE = cell, stringsAsFactors = FALSE)
    }, error = function(e) data.frame(Variable = label_term(iv), AIPW_ATE = "\u2014",
                                      stringsAsFactors = FALSE))
  })
  ft_B14 <- make_simple_ft(do.call(rbind, aipw_rows),
                           "Table B14. Doubly Robust (AIPW) Estimates -- ATE on Initiation",
                           paste(NOTE_CI, "Augmented IPW. 200 bootstrap replications for SE. Risk difference scale."))
}

message("  B15-B17: OLS SE comparison...")
ols_conv_td <- lapply(IVS_safe, function(iv) tidy_panel(paste0("hostlev ~ ", rhs_full(iv)), d_T1))
ft_B15 <- build_table_df(ols_conv_td[[1]], ols_conv_td[[2]], ols_conv_td[[3]], TERM_ORDER_MAIN_safe, "coef") |>
  add_n_row(N_T1) |> make_flextable("Table B8. Pooled OLS -- Conventional SE", c(NOTE_CI))

ols_hc_td <- lapply(IVS_safe, function(iv) {
  m <- tryCatch(stats::lm(stats::as.formula(paste0("hostlev ~ ", rhs_full(iv))), data = d_T1),
                error = function(e) NULL)
  if (is.null(m)) return(empty_td)
  V <- tryCatch(sandwich::vcovHC(m, type = "HC1"), error = function(e) stats::vcov(m))
  td <- tidy_from_vcov(m, V)
  td[td$term != "(Intercept)", ]
})
ft_B16 <- build_table_df(ols_hc_td[[1]], ols_hc_td[[2]], ols_hc_td[[3]], TERM_ORDER_MAIN_safe, "coef") |>
  add_n_row(N_T1) |> make_flextable("Table B9. Pooled OLS -- Robust (HC1) SE", c(NOTE_CI, "HC1 robust SE."))

ft_B17 <- build_table_df(ols_conv_td[[1]], ols_conv_td[[2]], ols_conv_td[[3]], TERM_ORDER_MAIN_safe, "coef") |>
  add_n_row(N_T1) |> make_flextable("Table B10. Pooled OLS -- Country-Clustered SE",
                                    c(NOTE_CI, "Model-based SE shown (clustered SE in main text)."))

message("  B18-B20: Predicted probabilities...")
pp_rows <- list()
for (iv in IVS_safe) {
  idx <- match(iv, IVS_safe)
  m <- tryCatch(get(paste0("m_ol_", idx)), error = function(e) NULL)
  if (is.null(m)) next
  mf_cols <- stats::model.frame(m)[, -1, drop = FALSE]
  nd0 <- nd1 <- as.data.frame(lapply(mf_cols, function(x) {
    if (is.numeric(x)) rep(mean(x, na.rm = TRUE), 2) else rep(x[1], 2)
  }))
  nd0[[iv]] <- 0
  nd1[[iv]] <- if (iv == "troop_binary") 1 else sd(d_T1[[iv]], na.rm = TRUE)
  pr0 <- tryCatch(predict(m, newdata = nd0, type = "probs"), error = function(e) NULL)
  pr1 <- tryCatch(predict(m, newdata = nd1, type = "probs"), error = function(e) NULL)
  if (!is.null(pr0) && !is.null(pr1)) {
    diff <- pr1[1,] - pr0[1,]
    pp_rows[[length(pp_rows)+1]] <- data.frame(
      Variable = label_term(iv),
      sapply(names(diff), function(lvl) formatC(diff[lvl], format = "f", digits = 4)),
      stringsAsFactors = FALSE, check.names = FALSE)
  }
}
ft_B18 <- if (length(pp_rows) > 0) {
  make_simple_ft(do.call(rbind, pp_rows),
                 "Table B11. Predicted Probability Changes -- Hostility Levels",
                 "Change in Pr(hostlev = k) when IV goes from 0 to 1 (binary) or 0 to 1SD (continuous).")
} else NULL

pp_init_rows <- lapply(IVS_safe, function(iv) {
  m <- tryCatch(stats::glm(stats::as.formula(paste0("initiator ~ ", rhs_full(iv))),
                           data = d_T2, family = stats::binomial("logit")), error = function(e) NULL)
  if (is.null(m)) return(data.frame(Variable = label_term(iv), Base_Prob = "\u2014",
                                    Change = "\u2014", stringsAsFactors = FALSE))
  mf_cols <- stats::model.frame(m)[, -1, drop = FALSE]
  nd0 <- nd1 <- as.data.frame(lapply(mf_cols, function(x)
    if(is.numeric(x)) mean(x, na.rm = TRUE) else x[1]))[rep(1,1), , drop = FALSE]
  nd0[[iv]] <- 0
  nd1[[iv]] <- if (iv == "troop_binary") 1 else sd(d_T2[[iv]], na.rm = TRUE)
  p0 <- predict(m, newdata = nd0, type = "response")
  p1 <- predict(m, newdata = nd1, type = "response")
  data.frame(Variable = label_term(iv),
             Base_Prob = formatC(p0, format = "f", digits = 4),
             Change = formatC(p1 - p0, format = "f", digits = 4),
             stringsAsFactors = FALSE)
})
ft_B19 <- make_simple_ft(do.call(rbind, pp_init_rows),
                         "Table B12. Predicted Probability Changes -- Initiation",
                         "Base at IV=0; change to 1 (binary) or 1SD (continuous). Full spec.")

ec_rows <- lapply(IVS_safe, function(iv) {
  idx <- match(iv, IVS_safe)
  m <- tryCatch(get(paste0("m_z_", idx)), error = function(e) NULL)
  if (is.null(m) || !inherits(m, "zeroinfl"))
    return(data.frame(Variable = label_term(iv), Base_Count = "\u2014",
                      Change = "\u2014", stringsAsFactors = FALSE))
  tryCatch({
    cnt_names <- names(stats::coef(m, "count"))[-1]
    cnt_names <- cnt_names[cnt_names %in% names(d_T3)]
    d_cf <- as.data.frame(lapply(d_T3[, cnt_names, drop = FALSE], function(x)
      if(is.numeric(x)) mean(x, na.rm = TRUE) else x[1]))[rep(1,1), , drop = FALSE]
    d_cf[[iv]] <- 0; c0 <- predict(m, newdata = d_cf, type = "response")
    d_cf[[iv]] <- if (iv == "troop_binary") 1 else sd(d_T3[[iv]], na.rm = TRUE)
    c1 <- predict(m, newdata = d_cf, type = "response")
    data.frame(Variable = label_term(iv),
               Base_Count = formatC(c0, format = "f", digits = 3),
               Change = formatC(c1 - c0, format = "f", digits = 3),
               stringsAsFactors = FALSE)
  }, error = function(e) data.frame(Variable = label_term(iv),
                                    Base_Count = "\u2014", Change = "\u2014", stringsAsFactors = FALSE))
})
ft_B20 <- make_simple_ft(do.call(rbind, ec_rows),
                         "Table B20. Expected MID Count Changes -- ZINB",
                         "Base at IV=0; change to 1 (binary) or 1SD (continuous). Full spec. DV: mid_sum_clean.")

message("  Section B complete (B1-B20).")

message("\n=== SECTION C: Model-Building Transparency ===")

message("  C1-C3: All 3 IVs...")
rhs_all3 <- paste(IVS_safe, collapse = " + ")
trio_C1 <- fit_trio(rhs_fn = function(iv) rhs_all3, data_ol = d0, data_lg = d0, data_z = d0)
fts_C1 <- make_trio_tables(trio_C1, IVS_safe, N_T1, N_T2, N_T3,
                           "C1", "Table C1. All 3 IVs -- OLogit",
                           "Table C2. All 3 IVs -- Logit (OR)",
                           "Table C3. All 3 IVs -- ZINB",
                           sec_hdr = SEC_NONE, notes_extra = "All 3 PK measures jointly; no controls.")

message("  C4-C6: + RC...")
trio_C4 <- fit_trio(rhs_fn = function(iv) paste(c(iv, RC_safe), collapse = " + "),
                    data_ol = d0, data_lg = d0, data_z = d0)
TERM_RC <- c(IVS_safe, RC_safe)
SEC_RC <- list(troop_binary = "PK variable", lag_mid_bin = "Reverse-causality controls")
fts_C4 <- make_trio_tables(trio_C4, TERM_RC, N_T1, N_T2, N_T3,
                           "C4", "Table C4. PK + RC -- OLogit",
                           "Table C5. PK + RC -- Logit (OR)",
                           "Table C6. PK + RC -- ZINB",
                           sec_hdr = SEC_RC, notes_extra = "PK + RC; no standard controls.")

message("  C7-C9: + Controls...")
trio_C7 <- fit_trio(rhs_fn = function(iv) paste(c(iv, RC_safe, CTRL_safe), collapse = " + "),
                    data_ol = d0, data_lg = d0, data_z = d0)
TERM_RCCTRL <- c(IVS_safe, RC_safe, CTRL_safe)
SEC_RCCTRL <- list(troop_binary = "PK variable", lag_mid_bin = "RC",
                   democracy_polity2 = "Standard controls")
fts_C7 <- make_trio_tables(trio_C7, TERM_RCCTRL, N_T1, N_T2, N_T3,
                           "C7", "Table C7. PK + RC + Controls -- OLogit",
                           "Table C8. PK + RC + Controls -- Logit (OR)",
                           "Table C9. PK + RC + Controls -- ZINB",
                           sec_hdr = SEC_RCCTRL, notes_extra = "PK + RC + controls; no region FE / time poly.")

message("  C13-C15: Sequential controls...")
SEQ_SPECS <- list(
  list(lab = "Bivariate",    fn = function() "troop_binary"),
  list(lab = "+ RC",         fn = function() paste(c("troop_binary", RC_safe), collapse = " + ")),
  list(lab = "+ Democracy",  fn = function() paste(c("troop_binary", RC_safe, "democracy_polity2"), collapse = " + ")),
  list(lab = "+ Capability", fn = function() paste(c("troop_binary", RC_safe, "democracy_polity2", "ln_capability"), collapse = " + ")),
  list(lab = "+ Trade",      fn = function() paste(c("troop_binary", RC_safe, CTRL_safe[1:min(3,length(CTRL_safe))]), collapse = " + ")),
  list(lab = "Full",         fn = function() rhs_full("troop_binary"))
)

make_seq_ft <- function(fit_fn, tidy_fn, title, scale = "coef") {
  rows <- lapply(SEQ_SPECS, function(s) {
    rhs <- s$fn()
    m <- tryCatch(fit_fn(rhs), error = function(e) NULL)
    if (is.null(m)) return(data.frame(Specification = s$lab, Troop_Binary = "\u2014",
                                      N = "\u2014", stringsAsFactors = FALSE))
    td <- tryCatch(tidy_fn(m), error = function(e) NULL)
    if (is.null(td) || !is.data.frame(td) || !"term" %in% names(td)) td <- empty_td
    r <- td[td$term == "troop_binary", , drop = FALSE]
    nn <- tryCatch(as.character(model_N(m)), error = function(e) "\u2014")
    if (NROW(r) == 0) return(data.frame(Specification = s$lab, Troop_Binary = "\u2014",
                                        N = nn, stringsAsFactors = FALSE))
    cell <- if (scale == "or") fmt_cell_or(r$estimate, r$conf.low, r$conf.high, r$p.value)
    else fmt_cell_ci(r$estimate, r$conf.low, r$conf.high, r$p.value, se = r$std.error)
    data.frame(Specification = s$lab, Troop_Binary = cell, N = nn, stringsAsFactors = FALSE)
  })
  make_simple_ft(do.call(rbind, rows), title, NOTE_CI)
}

ft_C10 <- make_seq_ft(
  function(rhs) MASS::polr(stats::as.formula(paste0("hostlev_f ~ ", rhs)), data = d0,
                           method = "logistic", Hess = TRUE, model = TRUE),
  function(m) tidy_polr_robust(m), "Table C10. Sequential -- OLogit (Troop Binary)")
ft_C11 <- make_seq_ft(
  function(rhs) stats::glm(stats::as.formula(paste0("initiator ~ ", rhs)), data = d0,
                           family = stats::binomial("logit")),
  function(m) tidy_glm_simple(m), "Table C11. Sequential -- Logit (OR, Troop Binary)", "or")
ft_C12 <- make_seq_ft(
  function(rhs) tryCatch(pscl::zeroinfl(stats::as.formula(paste0("mid_sum_clean ~ ", rhs, " | ", infl_rhs())),
                                        data = d0, dist = "negbin", link = "logit",
                                        control = pscl::zeroinfl.control(maxit = 3000)),
                         error = function(e) tryCatch(MASS::glm.nb(stats::as.formula(paste0("mid_sum_clean ~ ", rhs)), data = d0,
                                                                   control = stats::glm.control(maxit = 100)), error = function(e2) NULL)),
  function(m) tidy_count_model(m), "Table C12. Sequential -- ZINB (Troop Binary)")

message("  C13: Spec ladder...")
ladder_rows <- lapply(SEQ_SPECS, function(s) {
  rhs <- s$fn()
  m_ol <- tryCatch(MASS::polr(stats::as.formula(paste0("hostlev_f ~ ", rhs)), data = d0,
                              method = "logistic", Hess = TRUE, model = TRUE), error = function(e) NULL)
  td_ol <- if (!is.null(m_ol)) tryCatch(tidy_polr_robust(m_ol), error = function(e) empty_td) else empty_td
  r_ol <- td_ol[td_ol$term == "troop_binary", ]
  c_ol <- if (NROW(r_ol) > 0) fmt_cell_ci(r_ol$estimate, r_ol$conf.low, r_ol$conf.high, r_ol$p.value, se = r_ol$std.error) else "\u2014"
  m_lg <- tryCatch(stats::glm(stats::as.formula(paste0("initiator ~ ", rhs)), data = d0,
                              family = stats::binomial("logit")), error = function(e) NULL)
  c_lg <- get_or(m_lg, "troop_binary")
  m_z <- tryCatch(pscl::zeroinfl(stats::as.formula(paste0("mid_sum_clean ~ ", rhs, " | ", infl_rhs())),
                                 data = d0, dist = "negbin", link = "logit",
                                 control = pscl::zeroinfl.control(maxit = 3000)),
                  error = function(e) tryCatch(MASS::glm.nb(stats::as.formula(paste0("mid_sum_clean ~ ", rhs)), data = d0,
                                                            control = stats::glm.control(maxit = 100)), error = function(e2) NULL))
  c_z <- get_coef(m_z, "troop_binary")
  n_str <- tryCatch(as.character(model_N(m_lg)), error = function(e) "\u2014")
  data.frame(Specification = s$lab, OLogit = c_ol, Logit_OR = c_lg, ZINB = c_z,
             N = n_str, stringsAsFactors = FALSE)
})
ft_C13 <- make_simple_ft(do.call(rbind, ladder_rows),
                         "Table C13. Specification Ladder Summary (Troop Binary)",
                         paste(NOTE_CI, "Troop binary coefficient across progressively richer specs."))

message("  Section C complete (C1-C13).")

message("\n=== SECTION D: Panel Estimators ===")

CTRL_NT <- c(RC_safe, CTRL_safe, REGION_safe)

message("  D1: Country FE...")
fe_td <- lapply(IVS_safe, function(iv) {
  rhs <- paste(c(iv, CTRL_NT, "factor(cow)"), collapse = " + ")
  td <- tidy_panel(paste0("hostlev ~ ", rhs), d_T1)
  td[!grepl("^factor\\(cow\\)", td$term) & td$term != "(Intercept)", ]
})
ft_D1 <- build_table_df(fe_td[[1]], fe_td[[2]], fe_td[[3]], c(IVS_safe, RC_safe, CTRL_safe), "coef",
                        list(troop_binary = "PK", lag_mid_bin = "RC", democracy_polity2 = "Controls")) |>
  add_n_row(N_T1) |> make_flextable("Table D1. Country FE -- Hostility",
                                    c(NOTE_CI, "Country FE via indicator dummies. Region FE dropped (collinear)."))

message("  D2: RE...")
re_td <- lapply(IVS_safe, function(iv) {
  m <- tryCatch(lme4::lmer(stats::as.formula(paste0("hostlev ~ ", rhs_full(iv), " + (1|cow)")),
                           data = d_T1), error = function(e) NULL)
  if (!is.null(m)) {
    b <- lme4::fixef(m); V <- as.matrix(stats::vcov(m))
    se <- sqrt(diag(V)); crit <- stats::qnorm(0.975); nms <- names(b)
    td <- tibble::tibble(term = nms, estimate = as.numeric(b), std.error = as.numeric(se),
                         conf.low = as.numeric(b - crit*se), conf.high = as.numeric(b + crit*se),
                         p.value = as.numeric(2*stats::pnorm(abs(b/se), lower.tail = FALSE)))
    td[td$term != "(Intercept)", ]
  } else { message("  [WARN] RE failed for ", iv); tidy_panel(paste0("hostlev ~ ", rhs_full(iv)), d_T1) }
})
ft_D2 <- build_table_df(re_td[[1]], re_td[[2]], re_td[[3]], TERM_ORDER_MAIN_safe, "coef") |>
  add_n_row(N_T1) |> make_flextable("Table D2. Random Effects -- Hostility", c(NOTE_CI, "lme4::lmer."))

message("  D3: 2WFE...")
twfe_td <- lapply(IVS_safe, function(iv) {
  rhs <- paste(c(iv, CTRL_NT, "factor(cow)", "factor(time)"), collapse = " + ")
  td <- tidy_panel(paste0("hostlev ~ ", rhs), d_T1)
  td[!grepl("^factor\\(", td$term) & td$term != "(Intercept)", ]
})
ft_D3 <- build_table_df(twfe_td[[1]], twfe_td[[2]], twfe_td[[3]], c(IVS_safe, RC_safe, CTRL_safe), "coef",
                        list(troop_binary = "PK", lag_mid_bin = "RC", democracy_polity2 = "Controls")) |>
  add_n_row(N_T1) |> make_flextable("Table D3. Two-Way FE -- Hostility", c(NOTE_CI, "Country + year FE."))

message("  D4: FE Initiation...")
fe_init_td <- lapply(IVS_safe, function(iv) {
  rhs <- paste(c(iv, CTRL_NT, "factor(cow)"), collapse = " + ")
  td <- tidy_panel(paste0("initiator ~ ", rhs), d_T2)
  td[!grepl("^factor\\(", td$term) & td$term != "(Intercept)", ]
})
ft_D4 <- build_table_df(fe_init_td[[1]], fe_init_td[[2]], fe_init_td[[3]],
                        c(IVS_safe, RC_safe, CTRL_safe), "coef",
                        list(troop_binary = "PK", lag_mid_bin = "RC", democracy_polity2 = "Controls")) |>
  add_n_row(N_T2) |> make_flextable("Table D4. Country FE LPM -- Initiation",
                                    c(NOTE_CI, "LPM with country FE dummies."))

message("  D5: 2WFE Initiation...")
twfe_init_td <- lapply(IVS_safe, function(iv) {
  rhs <- paste(c(iv, CTRL_NT, "factor(cow)", "factor(time)"), collapse = " + ")
  td <- tidy_panel(paste0("initiator ~ ", rhs), d_T2)
  td[!grepl("^factor\\(", td$term) & td$term != "(Intercept)", ]
})
ft_D5 <- build_table_df(twfe_init_td[[1]], twfe_init_td[[2]], twfe_init_td[[3]],
                        c(IVS_safe, RC_safe, CTRL_safe), "coef") |>
  add_n_row(N_T2) |> make_flextable("Table D4. Two-Way FE LPM -- Initiation",
                                    c(NOTE_CI, "Country + year FE."))

message("  D6: Panel count...")
pois_fe_td <- lapply(IVS_safe, function(iv) {
  m <- tryCatch(stats::glm(stats::as.formula(paste0("mid_sum ~ ", iv, " + ",
                                                    paste(CTRL_NT, collapse = " + "), " + factor(cow)")),
                           data = d_T3, family = stats::poisson()), error = function(e) NULL)
  if (is.null(m)) return(empty_td)
  td <- tidy_glm_simple(m)
  td[!grepl("^factor\\(", td$term), ]
})
ft_D6 <- build_table_df(pois_fe_td[[1]], pois_fe_td[[2]], pois_fe_td[[3]],
                        c(IVS_safe, RC_safe, CTRL_safe), "coef") |>
  add_n_row(N_T3) |> make_flextable("Table D6. Poisson FE -- MID Count",
                                    c(NOTE_CI, "Poisson with country indicator dummies."))

message("  Section D complete (D1-D6).")

message("\n=== SECTION E: Alternative Estimators ===")

message("  E1: Ordered Probit...")
oprob_td <- lapply(IVS_safe, function(iv) {
  m <- tryCatch(MASS::polr(stats::as.formula(paste0("hostlev_f ~ ", rhs_full(iv))),
                           data = d_T1, method = "probit", Hess = TRUE, model = TRUE), error = function(e) NULL)
  if (!is.null(m)) tidy_polr_robust(m) else empty_td
})
ft_E1 <- build_table_df(oprob_td[[1]], oprob_td[[2]], oprob_td[[3]], TERM_ORDER_MAIN_safe, "coef") |>
  add_n_row(N_T1) |> make_flextable("Table E1. Ordered Probit -- Hostility", c(NOTE_CI))

message("  E2: Probit...")
prob_td <- lapply(IVS_safe, function(iv) {
  m <- tryCatch(stats::glm(stats::as.formula(paste0("initiator ~ ", rhs_full(iv))),
                           data = d_T2, family = stats::binomial("probit")), error = function(e) NULL)
  if (!is.null(m)) tidy_glm_simple(m) else empty_td
})
ft_E2 <- build_table_df(prob_td[[1]], prob_td[[2]], prob_td[[3]], TERM_ORDER_MAIN_safe, "coef") |>
  add_n_row(N_T2) |> make_flextable("Table E2. Probit -- Initiation", c(NOTE_CI))

message("  E3: Poisson...")
pois_td <- lapply(IVS_safe, function(iv) {
  m <- tryCatch(stats::glm(stats::as.formula(paste0("mid_sum ~ ", rhs_full(iv))),
                           data = d_T3, family = stats::poisson()), error = function(e) NULL)
  if (!is.null(m)) tidy_glm_simple(m) else empty_td
})
ft_E3 <- build_table_df(pois_td[[1]], pois_td[[2]], pois_td[[3]], TERM_ORDER_MAIN_safe, "coef") |>
  add_n_row(N_T3) |> make_flextable("Table E3. Poisson -- MID Count", c(NOTE_CI))

message("  E4: NB...")
nb_td <- lapply(IVS_safe, function(iv) {
  m <- tryCatch(MASS::glm.nb(stats::as.formula(paste0("mid_sum ~ ", rhs_full(iv))),
                             data = d_T3, control = stats::glm.control(maxit = 100)), error = function(e) NULL)
  if (!is.null(m)) tidy_glm_simple(m) else empty_td
})
ft_E4 <- build_table_df(nb_td[[1]], nb_td[[2]], nb_td[[3]], TERM_ORDER_MAIN_safe, "coef") |>
  add_n_row(N_T3) |> make_flextable("Table E4. Negative Binomial -- MID Count", c(NOTE_CI, "Not zero-inflated."))

message("  E5: Hurdle NB...")
hurdle_td <- lapply(IVS_safe, function(iv) {
  m <- tryCatch(pscl::hurdle(
    stats::as.formula(paste0("mid_sum ~ ", rhs_full(iv), " | ", rhs_full(iv))),
    data = d_T3, dist = "negbin", zero.dist = "binomial", link = "logit"),
    error = function(e) NULL)
  if (is.null(m)) return(empty_td)
  b <- stats::coef(m, "count"); V <- stats::vcov(m)
  nms <- names(b)
  vcov_nms <- paste0("count_", nms)
  idx <- match(vcov_nms, colnames(V))
  if (any(is.na(idx))) {
    idx <- match(nms, colnames(V))
  }
  if (any(is.na(idx))) return(empty_td)
  se <- sqrt(pmax(diag(V[idx, idx, drop = FALSE]), 0)); crit <- stats::qnorm(0.975)
  td <- tibble::tibble(term = nms, estimate = as.numeric(b), std.error = as.numeric(se),
                       conf.low = as.numeric(b - crit*se), conf.high = as.numeric(b + crit*se),
                       p.value = as.numeric(2*stats::pnorm(abs(b/se), lower.tail = FALSE)))
  td[td$term != "(Intercept)", ]
})
ft_E5 <- build_table_df(hurdle_td[[1]], hurdle_td[[2]], hurdle_td[[3]], TERM_ORDER_MAIN_safe, "coef") |>
  add_n_row(N_T3) |> make_flextable("Table E5. Hurdle NB -- MID Count (Count Part)",
                                    c(NOTE_CI, "Two-part model: logit for zero vs nonzero; NB for positive counts."))

message("  E6: Model comparison...")
comp_rows <- lapply(IVS_safe, function(iv) {
  rhs <- rhs_full(iv)
  m_pois <- tryCatch(stats::glm(stats::as.formula(paste0("mid_sum ~ ", rhs)),
                                data = d_T3, family = stats::poisson()), error = function(e) NULL)
  m_nb <- tryCatch(MASS::glm.nb(stats::as.formula(paste0("mid_sum ~ ", rhs)),
                                data = d_T3, control = stats::glm.control(maxit = 100)), error = function(e) NULL)
  m_zinb <- tryCatch(pscl::zeroinfl(stats::as.formula(paste0("mid_sum ~ ", rhs, " | ", infl_rhs())),
                                    data = d_T3, dist = "negbin", link = "logit",
                                    control = pscl::zeroinfl.control(maxit = 3000)), error = function(e) NULL)
  get_aic <- function(m) if (!is.null(m)) tryCatch(formatC(AIC(m), format="f", digits=1), error=function(e) "\u2014") else "\u2014"
  get_ll  <- function(m) if (!is.null(m)) tryCatch(formatC(as.numeric(logLik(m)), format="f", digits=1), error=function(e) "\u2014") else "\u2014"
  obs_zero <- sum(d_T3$mid_sum == 0, na.rm = TRUE)
  pred_zero_pois <- if (!is.null(m_pois)) tryCatch(round(sum(dpois(0, fitted(m_pois)))), error = function(e) "\u2014") else "\u2014"
  pred_zero_nb   <- if (!is.null(m_nb)) tryCatch(round(sum(dnbinom(0, mu = fitted(m_nb), size = m_nb$theta))), error = function(e) "\u2014") else "\u2014"
  pred_zero_zinb <- if (!is.null(m_zinb)) tryCatch(round(sum(predict(m_zinb, type = "prob")[,1])), error = function(e) "\u2014") else "\u2014"
  data.frame(Model = c("Poisson", "NB", "ZINB"),
             AIC = c(get_aic(m_pois), get_aic(m_nb), get_aic(m_zinb)),
             LogLik = c(get_ll(m_pois), get_ll(m_nb), get_ll(m_zinb)),
             Pred_Zeros = c(as.character(pred_zero_pois), as.character(pred_zero_nb), as.character(pred_zero_zinb)),
             Obs_Zeros = rep(as.character(obs_zero), 3),
             stringsAsFactors = FALSE)
})
ft_E6 <- make_simple_ft(comp_rows[[1]],
                        "Table E6. ZIP vs ZINB vs NB Comparison (Troop Binary Spec)",
                        "AIC, log-likelihood, predicted vs observed zeros.")

message("  Section E complete (E1-E6).")

message("\n=== SECTION F: Endogeneity and Selection ===")

if (!"neverpk_dummy" %in% names(d0)) {
  d0 <- d0 |> dplyr::group_by(cow) |>
    dplyr::mutate(.mt = suppressWarnings(max(troop_binary, na.rm = TRUE)),
                  neverpk_dummy = dplyr::if_else(is.finite(.mt) & .mt == 0, 1L, 0L)) |>
    dplyr::ungroup() |> dplyr::select(-.mt)
  d_T1 <- dplyr::filter(d0, sample_T1 == 1L)
  d_T2 <- dplyr::filter(d0, sample_T2 == 1L)
  d_T3 <- dplyr::filter(d0, sample_T3 == 1L)
  d_T1$cow <- as.integer(d_T1$cow)
  d_T2$cow <- as.integer(d_T2$cow)
  d_T3$cow <- as.integer(d_T3$cow)
}
SEL_Z_safe <- intersect(c("neverpk_dummy", RHS_safe), names(d_T1))

add_cf_resid <- function(data, iv) {
  d_cf <- as.data.frame(data)
  m1 <- NULL
  if (iv == "troop_binary") {
    sel_rhs <- setdiff(SEL_Z_safe, "troop_binary")
    m1 <- tryCatch(stats::glm(stats::as.formula(paste0("troop_binary ~ ",
                                                       paste(sel_rhs, collapse = " + "))),
                              data = d_cf, family = stats::binomial("probit")), error = function(e) NULL)
    if (!is.null(m1)) {
      xb <- stats::predict(m1, type = "link")
      pp <- pmin(pmax(stats::pnorm(xb), 1e-6), 1 - 1e-6)
      d_cf$cf_resid <- ifelse(d_cf$troop_binary == 1,
                              stats::dnorm(xb)/pp, -stats::dnorm(xb)/(1-pp))
    } else { d_cf$cf_resid <- 0 }
  } else {
    m1 <- tryCatch(stats::lm(stats::as.formula(paste0(iv, " ~ ",
                                                      paste(SEL_Z_safe, collapse = " + "))), data = d_cf), error = function(e) NULL)
    d_cf$cf_resid <- if (!is.null(m1)) stats::residuals(m1) else 0
  }
  attr(d_cf, ".m1") <- m1
  d_cf
}

message("  F1: CF Initiation...")
cf_rows <- list()
for (iv in IVS_safe) {
  d_cf <- add_cf_resid(d_T2, iv)
  rhs_n <- paste(c(iv, RHS_safe), collapse = " + ")
  rhs_c <- paste(c(iv, RHS_safe, "cf_resid"), collapse = " + ")
  m_n <- tryCatch(stats::glm(stats::as.formula(paste0("initiator ~ ", rhs_n)),
                             data = d_cf, family = stats::binomial("logit")), error = function(e) NULL)
  m_c <- tryCatch(stats::glm(stats::as.formula(paste0("initiator ~ ", rhs_c)),
                             data = d_cf, family = stats::binomial("logit")), error = function(e) NULL)
  cf_rows[[length(cf_rows)+1]] <- data.frame(Variable = label_term(iv),
                                             Naive = get_or(m_n, iv), CF = get_or(m_c, iv), stringsAsFactors = FALSE)
}
ft_F1 <- make_simple_ft(do.call(rbind, cf_rows),
                        "Table F1. Naive vs CF -- Logit (OR, Initiation)",
                        paste(NOTE_CI, "Probit 1st stage (binary); OLS (continuous)."))

message("  F2: CF Binary Hostility...")
d_T1$hostile_binary <- as.integer(as.numeric(d_T1$hostlev) > 0)
cf2_rows <- list()
for (iv in IVS_safe) {
  d_cf <- add_cf_resid(d_T1, iv)
  rhs_n <- paste(c(iv, RHS_safe), collapse = " + ")
  rhs_c <- paste(c(iv, RHS_safe, "cf_resid"), collapse = " + ")
  m_n <- tryCatch(stats::glm(stats::as.formula(paste0("hostile_binary ~ ", rhs_n)),
                             data = d_cf, family = stats::binomial("logit")), error = function(e) NULL)
  m_c <- tryCatch(stats::glm(stats::as.formula(paste0("hostile_binary ~ ", rhs_c)),
                             data = d_cf, family = stats::binomial("logit")), error = function(e) NULL)
  cf2_rows[[length(cf2_rows)+1]] <- data.frame(Variable = label_term(iv),
                                               Naive = get_or(m_n, iv), CF = get_or(m_c, iv), stringsAsFactors = FALSE)
}
ft_F2 <- make_simple_ft(do.call(rbind, cf2_rows),
                        "Table F2. Naive vs CF -- Logit (OR, Binary Hostility)",
                        paste(NOTE_CI, "Probit 1st stage (binary); OLS (continuous)."))

message("  F3: CF Stage-1...")
s1_rows <- lapply(IVS_safe, function(iv) {
  d_cf <- add_cf_resid(d_T2, iv)
  m1 <- attr(d_cf, ".m1")
  if (is.null(m1)) return(data.frame(IV = label_term(iv), Stage1_Type = "\u2014",
                                     N = "\u2014", R2_or_PseudoR2 = "\u2014", stringsAsFactors = FALSE))
  s1_type <- if (iv == "troop_binary") "Probit" else "OLS"
  r2 <- tryCatch({
    if (s1_type == "OLS") formatC(summary(m1)$r.squared, format = "f", digits = 3)
    else formatC(1 - m1$deviance/m1$null.deviance, format = "f", digits = 3)
  }, error = function(e) "\u2014")
  data.frame(IV = label_term(iv), Stage1_Type = s1_type,
             N = tryCatch(as.character(nobs(m1)), error = function(e) "\u2014"),
             R2_or_PseudoR2 = r2, stringsAsFactors = FALSE)
})
ft_F3 <- make_simple_ft(do.call(rbind, s1_rows),
                        "Table F3. CF Stage-1 Results and Diagnostics",
                        "R2 for OLS; pseudo-R2 for probit.")

message("  F4: CF residual...")
resid_rows <- lapply(IVS_safe, function(iv) {
  d_cf <- add_cf_resid(d_T2, iv)
  rhs_c <- paste(c(iv, RHS_safe, "cf_resid"), collapse = " + ")
  m_c <- tryCatch(stats::glm(stats::as.formula(paste0("initiator ~ ", rhs_c)),
                             data = d_cf, family = stats::binomial("logit")), error = function(e) NULL)
  if (is.null(m_c)) return(data.frame(IV = label_term(iv), CF_Resid_Coef = "\u2014",
                                      CF_Resid_SE = "\u2014", CF_Resid_p = "\u2014", stringsAsFactors = FALSE))
  td <- tidy_glm_simple(m_c)
  r <- td[td$term == "cf_resid", ]
  if (NROW(r) == 0) return(data.frame(IV = label_term(iv), CF_Resid_Coef = "\u2014",
                                      CF_Resid_SE = "\u2014", CF_Resid_p = "\u2014", stringsAsFactors = FALSE))
  data.frame(IV = label_term(iv),
             CF_Resid_Coef = formatC(r$estimate, format = "f", digits = 3),
             CF_Resid_SE = formatC(r$std.error, format = "f", digits = 3),
             CF_Resid_p = formatC(r$p.value, format = "f", digits = 3),
             stringsAsFactors = FALSE)
})
ft_F4 <- make_simple_ft(do.call(rbind, resid_rows),
                        "Table F4. CF Residual Term Results",
                        "Non-significant cf_resid suggests exogeneity cannot be rejected.")

message("  F5: Lagged IVs...")
d_T2 <- d_T2[order(d_T2$cow, d_T2$time), ]
for (iv in IVS_safe) {
  lag_name <- paste0(iv, "_lag")
  d_T2[[lag_name]] <- ave(as.numeric(d_T2[[iv]]), d_T2$cow,
                          FUN = function(x) c(NA, x[-length(x)]))
}

lag_rows <- lapply(IVS_safe, function(iv) {
  iv_lag <- paste0(iv, "_lag")
  rhs <- paste(c(iv_lag, RHS_safe), collapse = " + ")
  m <- tryCatch(stats::glm(stats::as.formula(paste0("initiator ~ ", rhs)),
                           data = d_T2, family = stats::binomial("logit")), error = function(e) NULL)
  data.frame(Variable = label_term(iv_lag), OR = get_or(m, iv_lag), stringsAsFactors = FALSE)
})
ft_F5 <- make_simple_ft(do.call(rbind, lag_rows),
                        "Table F5. Lagged PK (t-1) -- Logit (OR, Initiation)",
                        paste(NOTE_CI, "PK lagged one year. Addresses temporal ordering."))

message("  F6: IPW...")
match_v <- intersect(c(RC_safe, "democracy_polity2", "ln_capability", "ln_gdp_cap"), names(d_T2))
ps_m <- tryCatch(stats::glm(stats::as.formula(paste0("troop_binary ~ ",
                                                     paste(match_v, collapse = " + "))),
                            data = d_T2, family = stats::binomial("logit")), error = function(e) NULL)
ft_F6 <- NULL; ft_F7 <- NULL
if (!is.null(ps_m)) {
  d_T2$ps <- stats::predict(ps_m, type = "response")
  d_T2$ipw <- ifelse(d_T2$troop_binary == 1, 1/d_T2$ps, 1/(1 - d_T2$ps))
  d_T2$ipw <- pmin(d_T2$ipw, quantile(d_T2$ipw, 0.99, na.rm = TRUE))
  ipw_rows <- lapply(IVS_safe, function(iv) {
    m <- tryCatch(stats::glm(stats::as.formula(paste0("initiator ~ ", rhs_full(iv))),
                             data = d_T2, family = stats::binomial("logit"), weights = ipw), error = function(e) NULL)
    data.frame(Variable = label_term(iv), OR = get_or(m, iv), stringsAsFactors = FALSE)
  })
  ft_F6 <- make_simple_ft(do.call(rbind, ipw_rows),
                          "Table F6. IPW Logit (OR) -- Initiation",
                          paste(NOTE_CI, "Inverse probability weighted. Trimmed at 99th pctile."))
  
  diag_df <- data.frame(
    Metric = c("PS Mean (Treated)", "PS Mean (Control)", "PS Min", "PS Max",
               "Weight Mean", "Weight Max (pre-trim)", "Weight Max (post-trim)", "N trimmed"),
    Value = c(
      formatC(mean(d_T2$ps[d_T2$troop_binary == 1], na.rm = TRUE), format = "f", digits = 3),
      formatC(mean(d_T2$ps[d_T2$troop_binary == 0], na.rm = TRUE), format = "f", digits = 3),
      formatC(min(d_T2$ps, na.rm = TRUE), format = "f", digits = 4),
      formatC(max(d_T2$ps, na.rm = TRUE), format = "f", digits = 4),
      formatC(mean(d_T2$ipw, na.rm = TRUE), format = "f", digits = 2),
      formatC(max(ifelse(d_T2$troop_binary == 1, 1/d_T2$ps, 1/(1-d_T2$ps)), na.rm = TRUE), format = "f", digits = 1),
      formatC(max(d_T2$ipw, na.rm = TRUE), format = "f", digits = 1),
      as.character(sum(d_T2$ipw == quantile(d_T2$ipw, 0.99, na.rm = TRUE), na.rm = TRUE))
    ), stringsAsFactors = FALSE)
  ft_F7 <- make_simple_ft(diag_df, "Table F7. IPW Diagnostics",
                          "Propensity score and weight distribution.")
}

message("  F8: Mundlak...")
mund_vars <- intersect(c(IVS_safe, RC_safe, CTRL_safe), names(d_T2))
for (v in mund_vars) {
  mn <- paste0(v, "_mean")
  if (!mn %in% names(d_T2))
    d_T2[[mn]] <- ave(as.numeric(d_T2[[v]]), d_T2$cow, FUN = function(x) mean(x, na.rm = TRUE))
}
mund_means <- paste0(mund_vars, "_mean")
mund_means <- mund_means[mund_means %in% names(d_T2)]

cre_rows <- lapply(IVS_safe, function(iv) {
  rhs <- paste(c(iv, RHS_safe, mund_means), collapse = " + ")
  m <- tryCatch(stats::glm(stats::as.formula(paste0("initiator ~ ", rhs)),
                           data = d_T2, family = stats::binomial("logit")), error = function(e) NULL)
  data.frame(Variable = label_term(iv), OR = get_or(m, iv), stringsAsFactors = FALSE)
})
ft_F8 <- make_simple_ft(do.call(rbind, cre_rows),
                        "Table F8. Mundlak/CRE Logit (OR) -- Initiation",
                        paste(NOTE_CI, "Includes country-means (Mundlak device)."))

message("  F9: Lead PK placebo...")
for (iv in IVS_safe) {
  lead_name <- paste0(iv, "_lead")
  d_T2[[lead_name]] <- ave(as.numeric(d_T2[[iv]]), d_T2$cow,
                           FUN = function(x) c(x[-1], NA))
}

lead_rows <- lapply(IVS_safe, function(iv) {
  iv_lead <- paste0(iv, "_lead")
  rhs <- paste(c(iv_lead, RHS_safe), collapse = " + ")
  m <- tryCatch(stats::glm(stats::as.formula(paste0("initiator ~ ", rhs)),
                           data = d_T2, family = stats::binomial("logit")), error = function(e) NULL)
  data.frame(Variable = label_term(iv_lead), OR = get_or(m, iv_lead), stringsAsFactors = FALSE)
})
ft_F9 <- make_simple_ft(do.call(rbind, lead_rows),
                        "Table F9. Lead PK (t+1) Placebo -- Logit (OR, Initiation)",
                        paste(NOTE_CI, "Future PK should NOT predict current outcomes if causal."))

message("  Section F complete (F1-F9).")

message("\n=== SECTION G: Robustness and Sensitivity ===")

message("  G1: MI...")
ft_G1 <- NULL
if (requireNamespace("mice", quietly = TRUE)) {
  imp_vars <- unique(intersect(c("hostlev","initiator","mid_sum", IVS_safe, RC_safe, CTRL_safe, "cow"), names(d0)))
  d_imp <- as.data.frame(lapply(d0[, imp_vars, drop = FALSE], as.numeric))
  mi_obj <- tryCatch(mice::mice(d_imp, m = 5, method = "pmm", maxit = 10,
                                printFlag = FALSE, seed = 1025), error = function(e) {
                                  message("  MI failed: ", conditionMessage(e)); NULL })
  if (!is.null(mi_obj)) {
    mi_rows <- lapply(IVS_safe, function(iv) {
      betas <- c()
      for (mm in 1:5) {
        d_m <- mice::complete(mi_obj, mm)
        for (rr in REGION_safe)
          if (!rr %in% names(d_m) && rr %in% names(d0)) d_m[[rr]] <- as.numeric(d0[[rr]])
        if (!"time" %in% names(d_m) && "time" %in% names(d0)) {
          d_m$time <- as.numeric(d0$time)
          d_m$time_squared <- d_m$time^2
          d_m$time_cubed   <- d_m$time^3
        }
        m <- tryCatch(stats::glm(stats::as.formula(paste0("initiator ~ ", rhs_full(iv))),
                                 data = d_m, family = stats::binomial("logit")), error = function(e) NULL)
        if (!is.null(m) && iv %in% names(stats::coef(m))) betas <- c(betas, stats::coef(m)[iv])
      }
      orig <- switch(iv,
                     troop_binary = t2_1,
                     ln_max_total_peacekeeper = t2_2,
                     logged_pk5years = t2_3,
                     empty_td)
      r_o <- orig[orig$term == iv, ]
      cell_o <- if (NROW(r_o) > 0) fmt_cell_or(r_o$estimate, r_o$conf.low, r_o$conf.high, r_o$p.value) else "\u2014"
      if (length(betas) >= 2) {
        est <- mean(betas); se_mi <- sqrt(var(betas) * (1 + 1/length(betas))); crit <- stats::qnorm(0.975)
        cell_m <- fmt_cell_or(est, est - crit*se_mi, est + crit*se_mi,
                              2*stats::pnorm(abs(est/se_mi), lower.tail = FALSE))
      } else cell_m <- "\u2014"
      data.frame(Variable = label_term(iv), Original = cell_o, MI = cell_m, stringsAsFactors = FALSE)
    })
    ft_G1 <- make_simple_ft(do.call(rbind, mi_rows), "Table G1. Listwise vs MI -- Logit (OR)",
                            paste(NOTE_CI, "5 imputations (PMM); Rubin's rules."))
  }
}

message("  G2-G8: Alt DVs...")
d_T2$hostile_binary <- as.integer(as.numeric(d_T2$hostlev) > 0)
d_T2$hostlev_2plus  <- as.integer(as.numeric(d_T2$hostlev) >= 2)
d_T2$hostlev_3plus  <- as.integer(as.numeric(d_T2$hostlev) >= 3)
d_T2$hostlev_4plus  <- as.integer(as.numeric(d_T2$hostlev) >= 4)
d_T2$hostlev_5      <- as.integer(as.numeric(d_T2$hostlev) >= 5)

alt_info <- list(
  list(dv = "hostile_binary", lab = "Any Hostility (>=1)", tbl = "G2", link = "logit"),
  list(dv = "hostlev_2plus",  lab = "Threat+ (>=2)",       tbl = "G3", link = "logit"),
  list(dv = "hostlev_3plus",  lab = "Display+ (>=3)",      tbl = "G4", link = "logit"),
  list(dv = "hostlev_4plus",  lab = "Force/War (>=4)",     tbl = "G5", link = "logit"),
  list(dv = "hostlev_3plus",  lab = "Display+ (>=3)",      tbl = "G6", link = "probit"),
  list(dv = "hostlev_4plus",  lab = "Force/War (>=4)",     tbl = "G7", link = "probit"),
  list(dv = "hostlev_5",      lab = "War Only (=5)",       tbl = "G8", link = "logit")
)

ft_G_alt <- lapply(alt_info, function(info) {
  td <- lapply(IVS_safe, function(iv) {
    fam <- if (info$link == "probit") stats::binomial("probit") else stats::binomial("logit")
    m <- tryCatch(stats::glm(stats::as.formula(paste0(info$dv, " ~ ", rhs_full(iv))),
                             data = d_T2, family = fam), error = function(e) NULL)
    if (!is.null(m)) tidy_glm_simple(m) else empty_td
  })
  scale <- if (info$link == "logit") "or" else "coef"
  build_table_df(td[[1]], td[[2]], td[[3]], TERM_ORDER_MAIN_safe, scale) |>
    add_n_row(rep(nrow(d_T2), 3)) |>
    make_flextable(paste0("Table ", info$tbl, ". ",
                          ifelse(info$link == "logit", "Logit (OR)", "Probit"), " -- ", info$lab),
                   c(NOTE_CI, ifelse(info$link == "logit", "OR.", "")))
})

message("  G9-G10: Time splits...")
d_early <- dplyr::filter(d_T2, time <= 2004)
d_late  <- dplyr::filter(d_T2, time >= 2005)
mk_split <- function(d, lab) {
  td <- lapply(IVS_safe, function(iv) {
    m <- tryCatch(stats::glm(stats::as.formula(paste0("initiator ~ ", rhs_full(iv))),
                             data = d, family = stats::binomial("logit")), error = function(e) NULL)
    if (!is.null(m)) tidy_glm_simple(m) else empty_td
  })
  build_table_df(td[[1]], td[[2]], td[[3]], TERM_ORDER_MAIN_safe, "or") |>
    add_n_row(rep(nrow(d), 3)) |> make_flextable(lab, c(NOTE_CI, "OR."))
}
ft_G9  <- mk_split(d_early, "Table G9. Time Split -- 1990-2004 (Logit OR)")
ft_G10 <- mk_split(d_late,  "Table G10. Time Split -- 2005-2014 (Logit OR)")

message("  G11: No P5...")
d_nmp <- dplyr::filter(d_T2, !cow %in% c(2L, 200L, 220L, 365L, 710L))
ft_G11 <- mk_split(d_nmp, "Table G8. Excluding P5 -- Logit (OR)")

message("  G12: Robust SE...")
rob_rows <- lapply(IVS_safe, function(iv) {
  m <- tryCatch(stats::glm(stats::as.formula(paste0("initiator ~ ", rhs_full(iv))),
                           data = d_T2, family = stats::binomial("logit")), error = function(e) NULL)
  if (is.null(m)) return(data.frame(Variable = label_term(iv), OR = "\u2014", stringsAsFactors = FALSE))
  V <- tryCatch(sandwich::vcovHC(m, type = "HC1"), error = function(e) stats::vcov(m))
  td <- tidy_from_vcov(m, V)
  r <- td[td$term == iv, ]
  cell <- if (NROW(r) > 0) fmt_cell_or(r$estimate, r$conf.low, r$conf.high, r$p.value) else "\u2014"
  data.frame(Variable = label_term(iv), OR = cell, stringsAsFactors = FALSE)
})
ft_G12 <- make_simple_ft(do.call(rbind, rob_rows),
                         "Table G9. HC1 Robust SE -- Logit (OR, Initiation)",
                         paste(NOTE_CI, "Heteroskedasticity-robust SE."))

message("  Section G complete (G1-G12).")

message("\n=== SECTION H: Matching and Balancing ===")

match_covs <- intersect(c(RC_safe, CTRL_safe), names(d_T2))
has_matchit <- requireNamespace("MatchIt", quietly = TRUE)

ft_H1 <- NULL; ft_H2 <- NULL; ft_H3 <- NULL; ft_H4 <- NULL; ft_H5 <- NULL
d_matched <- NULL

if (has_matchit && length(match_covs) >= 2) {
  
  message("  H1-H2: PSM...")
  psm_fmla <- stats::as.formula(paste0("troop_binary ~ ", paste(match_covs, collapse = " + ")))
  psm_obj <- tryCatch(MatchIt::matchit(psm_fmla, data = d_T2, method = "nearest",
                                       distance = "logit", caliper = 0.2, ratio = 1), error = function(e) NULL)
  
  if (!is.null(psm_obj)) {
    bal <- tryCatch({
      s <- summary(psm_obj)
      if (!is.null(s$sum.matched)) {
        bd <- as.data.frame(s$sum.matched)
        bd$Variable <- rownames(bd)
        cnames <- colnames(bd)
        tr_col <- grep("Means Treated|Mean Treated", cnames, value = TRUE)[1]
        ct_col <- grep("Means Control|Mean Control", cnames, value = TRUE)[1]
        smd_col <- grep("Std. Mean Diff|Mean Diff", cnames, value = TRUE)[1]
        if (!is.na(tr_col) && !is.na(ct_col) && !is.na(smd_col)) {
          bd <- bd[, c("Variable", tr_col, ct_col, smd_col)]
          names(bd) <- c("Variable", "Treated", "Control", "SMD")
          bd$Treated <- formatC(as.numeric(bd$Treated), format = "f", digits = 3)
          bd$Control <- formatC(as.numeric(bd$Control), format = "f", digits = 3)
          bd$SMD     <- formatC(as.numeric(bd$SMD),     format = "f", digits = 3)
          bd
        } else NULL
      } else NULL
    }, error = function(e) NULL)
    if (!is.null(bal))
      ft_H1 <- make_simple_ft(bal, "Table H1. PSM Balance Diagnostics (Post-Matching)",
                              "Nearest-neighbor, caliper=0.2. SMD < 0.1 indicates good balance.")
    
    d_matched <- tryCatch(MatchIt::match.data(psm_obj), error = function(e) NULL)
    if (!is.null(d_matched)) {
      psm_est_rows <- lapply(IVS_safe, function(iv) {
        m <- tryCatch(stats::glm(stats::as.formula(paste0("initiator ~ ", iv, " + ",
                                                          paste(match_covs, collapse = " + "))),
                                 data = d_matched, family = stats::binomial("logit"),
                                 weights = weights), error = function(e) NULL)
        data.frame(Variable = label_term(iv), OR = get_or(m, iv), stringsAsFactors = FALSE)
      })
      ft_H2 <- make_simple_ft(do.call(rbind, psm_est_rows),
                              "Table H2. PSM Matched Estimates -- Logit (OR, Initiation)",
                              paste(NOTE_CI, "Nearest-neighbor matching, caliper=0.2."))
    }
  }
  
  message("  H3-H4: CEM...")
  cem_obj <- tryCatch(MatchIt::matchit(psm_fmla, data = d_T2, method = "cem"),
                      error = function(e) NULL)
  
  if (!is.null(cem_obj)) {
    cem_bal <- tryCatch({
      s <- summary(cem_obj)
      if (!is.null(s$sum.matched)) {
        bd <- as.data.frame(s$sum.matched)
        bd$Variable <- rownames(bd)
        cnames <- colnames(bd)
        tr_col <- grep("Means Treated|Mean Treated", cnames, value = TRUE)[1]
        ct_col <- grep("Means Control|Mean Control", cnames, value = TRUE)[1]
        smd_col <- grep("Std. Mean Diff|Mean Diff", cnames, value = TRUE)[1]
        if (!is.na(tr_col) && !is.na(ct_col) && !is.na(smd_col)) {
          bd <- bd[, c("Variable", tr_col, ct_col, smd_col)]
          names(bd) <- c("Variable", "Treated", "Control", "SMD")
          bd$Treated <- formatC(as.numeric(bd$Treated), format = "f", digits = 3)
          bd$Control <- formatC(as.numeric(bd$Control), format = "f", digits = 3)
          bd$SMD     <- formatC(as.numeric(bd$SMD),     format = "f", digits = 3)
          bd
        } else NULL
      } else NULL
    }, error = function(e) NULL)
    if (!is.null(cem_bal))
      ft_H3 <- make_simple_ft(cem_bal, "Table H3. CEM Balance Diagnostics", "Coarsened exact matching.")
    
    d_cem <- tryCatch(MatchIt::match.data(cem_obj), error = function(e) NULL)
    if (!is.null(d_cem)) {
      cem_est_rows <- lapply(IVS_safe, function(iv) {
        m <- tryCatch(stats::glm(stats::as.formula(paste0("initiator ~ ", iv, " + ",
                                                          paste(match_covs, collapse = " + "))),
                                 data = d_cem, family = stats::binomial("logit"),
                                 weights = weights), error = function(e) NULL)
        data.frame(Variable = label_term(iv), OR = get_or(m, iv), stringsAsFactors = FALSE)
      })
      ft_H4 <- make_simple_ft(do.call(rbind, cem_est_rows),
                              "Table H4. CEM Matched Estimates -- Logit (OR, Initiation)",
                              paste(NOTE_CI, "Coarsened exact matching."))
    }
  }
  
  message("  H5: Comparison summary...")
  comp5_rows <- lapply(IVS_safe, function(iv) {
    m_raw <- tryCatch(stats::glm(stats::as.formula(paste0("initiator ~ ", rhs_full(iv))),
                                 data = d_T2, family = stats::binomial("logit")), error = function(e) NULL)
    c_raw <- get_or(m_raw, iv)
    c_ipw <- if ("ipw" %in% names(d_T2)) {
      m_ipw <- tryCatch(stats::glm(stats::as.formula(paste0("initiator ~ ", rhs_full(iv))),
                                   data = d_T2, family = stats::binomial("logit"), weights = ipw), error = function(e) NULL)
      get_or(m_ipw, iv)
    } else "\u2014"
    c_psm <- if (!is.null(d_matched)) {
      m_psm <- tryCatch(stats::glm(stats::as.formula(paste0("initiator ~ ", iv, " + ",
                                                            paste(match_covs, collapse = " + "))),
                                   data = d_matched, family = stats::binomial("logit"), weights = weights),
                        error = function(e) NULL)
      get_or(m_psm, iv)
    } else "\u2014"
    data.frame(Variable = label_term(iv), Unmatched = c_raw, IPW = c_ipw, PSM = c_psm,
               stringsAsFactors = FALSE)
  })
  ft_H5 <- make_simple_ft(do.call(rbind, comp5_rows),
                          "Table H4. IPW vs Matching Summary -- Logit (OR, Initiation)",
                          paste(NOTE_CI, "Comparing treatment effect estimates across methods."))
  
} else {
  message("  [SKIP] MatchIt not available or insufficient covariates.")
}

message("  Section H complete.")

message("\n=== SECTION I: Dynamics ===")

ft_I1 <- NULL; ft_I2 <- NULL; ft_I3 <- NULL; ft_I4 <- NULL

message("  I1: Lagged DV...")
d_T2$initiator_lag <- ave(as.numeric(d_T2$initiator), d_T2$cow,
                          FUN = function(x) c(NA, x[-length(x)]))
if ("hostlev" %in% names(d_T2))
  d_T2$hostlev_lag <- ave(as.numeric(d_T2$hostlev), d_T2$cow,
                          FUN = function(x) c(NA, x[-length(x)]))

ldv_rows <- lapply(IVS_safe, function(iv) {
  rhs_ldv <- paste(c(iv, "initiator_lag", RHS_safe), collapse = " + ")
  m <- tryCatch(stats::glm(stats::as.formula(paste0("initiator ~ ", rhs_ldv)),
                           data = d_T2, family = stats::binomial("logit")), error = function(e) NULL)
  data.frame(Variable = label_term(iv), OR = get_or(m, iv), stringsAsFactors = FALSE)
})
ft_I1 <- make_simple_ft(do.call(rbind, ldv_rows),
                        "Table I1. Lagged DV Model -- Logit (OR, Initiation)",
                        paste(NOTE_CI, "Includes lagged initiator. Addresses serial dependence."))

message("  I2: Distributed lags...")
d_T2$troop_binary_lag2 <- ave(as.numeric(d_T2$troop_binary), d_T2$cow,
                              FUN = function(x) c(NA, NA, x[1:(length(x)-2)]))
d_T2$troop_binary_lag3 <- ave(as.numeric(d_T2$troop_binary), d_T2$cow,
                              FUN = function(x) if(length(x) <= 3) rep(NA, length(x))
                              else c(rep(NA, 3), x[1:(length(x)-3)]))

dl_m <- tryCatch(stats::glm(stats::as.formula(paste0(
  "initiator ~ troop_binary + troop_binary_lag + troop_binary_lag2 + troop_binary_lag3 + ",
  paste(RHS_safe, collapse = " + "))),
  data = d_T2, family = stats::binomial("logit")), error = function(e) NULL)
if (!is.null(dl_m)) {
  td_dl <- tidy_glm_simple(dl_m)
  dl_terms <- c("troop_binary", "troop_binary_lag", "troop_binary_lag2", "troop_binary_lag3")
  dl_df <- do.call(rbind, lapply(dl_terms, function(tm) {
    r <- td_dl[td_dl$term == tm, ]
    if (NROW(r) > 0) data.frame(Lag = switch(tm,
                                             troop_binary = "t (contemporaneous)", troop_binary_lag = "t-1",
                                             troop_binary_lag2 = "t-2", troop_binary_lag3 = "t-3"),
                                OR = fmt_cell_or(r$estimate, r$conf.low, r$conf.high, r$p.value), stringsAsFactors = FALSE)
    else data.frame(Lag = tm, OR = "\u2014", stringsAsFactors = FALSE)
  }))
  ft_I2 <- make_simple_ft(dl_df, "Table I2. Distributed Lags of PK Exposure (Troop Binary)",
                          paste(NOTE_CI, "All lags included jointly."))
}

message("  I3: Cox model...")
has_survival <- requireNamespace("survival", quietly = TRUE)
if (has_survival && "time" %in% names(d_T2)) {
  d_T2$dur_time <- as.numeric(d_T2$time) - min(as.numeric(d_T2$time), na.rm = TRUE) + 1
  d_T2$event <- as.numeric(d_T2$initiator)
  cox_rows <- lapply(IVS_safe, function(iv) {
    rhs_cox <- paste(c(iv, RC_safe, CTRL_safe), collapse = " + ")
    m <- tryCatch(survival::coxph(stats::as.formula(paste0(
      "survival::Surv(dur_time, event) ~ ", rhs_cox)), data = d_T2),
      error = function(e) NULL)
    if (is.null(m)) return(data.frame(Variable = label_term(iv), HR = "\u2014", stringsAsFactors = FALSE))
    td <- tryCatch({
      b <- stats::coef(m); V <- stats::vcov(m)
      se <- sqrt(diag(V)); crit <- stats::qnorm(0.975)
      tibble::tibble(term = names(b), estimate = as.numeric(b), std.error = as.numeric(se),
                     conf.low = as.numeric(b - crit*se), conf.high = as.numeric(b + crit*se),
                     p.value = as.numeric(2*stats::pnorm(abs(b/se), lower.tail = FALSE)))
    }, error = function(e) empty_td)
    r <- td[td$term == iv, ]
    if (NROW(r) > 0) {
      hr <- exp(r$estimate); hr_lo <- exp(r$conf.low); hr_hi <- exp(r$conf.high)
      cell <- paste0(formatC(hr, format = "f", digits = 3), " [",
                     formatC(hr_lo, format = "f", digits = 3), ", ",
                     formatC(hr_hi, format = "f", digits = 3), "]",
                     ifelse(r$p.value < 0.001, "***", ifelse(r$p.value < 0.01, "**",
                                                             ifelse(r$p.value < 0.05, "*", ""))))
      data.frame(Variable = label_term(iv), HR = cell, stringsAsFactors = FALSE)
    } else data.frame(Variable = label_term(iv), HR = "\u2014", stringsAsFactors = FALSE)
  })
  ft_I3 <- make_simple_ft(do.call(rbind, cox_rows),
                          "Table I2. Cox Proportional Hazards -- Time to MID Initiation",
                          paste(NOTE_CI, "Hazard ratios reported."))
}

message("  I4: Event-time...")
ft_I4 <- tryCatch({
  pk_starts <- aggregate(time ~ cow, data = d_T2[d_T2$troop_binary == 1, ],
                         FUN = min, na.rm = TRUE)
  names(pk_starts)[2] <- "first_pk_year"
  d_evt <- merge(d_T2, pk_starts, by = "cow", all.x = TRUE)
  d_evt$rel_time <- as.numeric(d_evt$time) - as.numeric(d_evt$first_pk_year)
  d_evt <- d_evt[!is.na(d_evt$rel_time) & abs(d_evt$rel_time) <= 5, ]
  if (nrow(d_evt) < 20) stop("Too few obs")
  evt_df <- do.call(rbind, lapply(sort(unique(d_evt$rel_time)), function(rt) {
    d_rt <- d_evt[d_evt$rel_time == rt, ]
    data.frame(Rel_Time = rt, N = nrow(d_rt),
               Mean_Hostility = formatC(mean(as.numeric(d_rt$hostlev), na.rm = TRUE), format = "f", digits = 3),
               Mean_Initiation = formatC(mean(as.numeric(d_rt$initiator), na.rm = TRUE), format = "f", digits = 3),
               stringsAsFactors = FALSE)
  }))
  make_simple_ft(evt_df, "Table I3. Event-Time Around First PK Deployment",
                 "Years relative to first PK troop presence. Window: -5 to +5.")
}, error = function(e) { message("  [SKIP] Event-time: ", conditionMessage(e)); NULL })

message("  Section I complete.")

message("\n=== SECTION J: Placebo Tests ===")

ft_J1 <- NULL; ft_J2 <- NULL; ft_J3 <- NULL; ft_J4 <- NULL

d0 <- d0[order(d0$cow, d0$time), ]
for (iv in IVS_safe) {
  lead_name <- paste0(iv, "_lead")
  d0[[lead_name]] <- ave(as.numeric(d0[[iv]]), d0$cow,
                         FUN = function(x) c(x[-1], NA))
}

d_T1_lead <- d0[d0$sample_T1 == 1L & !is.na(d0$troop_binary_lead), ]
d_T2_lead <- d0[d0$sample_T2 == 1L & !is.na(d0$troop_binary_lead), ]
d_T3_lead <- d0[d0$sample_T3 == 1L & !is.na(d0$troop_binary_lead), ]

d_T1_lead$hostlev_f <- factor(d_T1_lead$hostlev, levels = 0:5,
                              labels = HOSTLEV_LABS, ordered = TRUE)
d_T1_lead$initiator <- as.integer(d_T1_lead$initiator)
d_T2_lead$initiator <- as.integer(d_T2_lead$initiator)

d_T3_lead$mid_sum_clean <- ifelse(
  as.numeric(d_T3_lead$lag_mid_bin) == 1 | as.numeric(d_T3_lead$persistent_dispute) == 1,
  0L, as.integer(d_T3_lead$mid_sum))

rhs_lead <- function(iv) {
  lead_name <- paste0(iv, "_lead")
  paste(c(lead_name, RHS_safe), collapse = " + ")
}

LEAD_IVS <- paste0(IVS_safe, "_lead")
TERM_ORDER_LEAD <- c(LEAD_IVS, intersect(c(RC_safe, CTRL_safe), names(d0)))

LEAD_LABELS <- setNames(
  paste0(label_term(IVS_safe), " (t+1 Lead)"),
  LEAD_IVS)

label_term_lead <- function(x) {
  out <- LEAD_LABELS[x]
  ifelse(is.na(out), label_term(x), out)
}

build_lead_table <- function(t1, t2, t3, term_order, scale = "coef") {
  ensure_td <- function(td) {
    if (is.null(td) || !is.data.frame(td) || !"term" %in% names(td)) return(empty_td)
    td
  }
  t1 <- ensure_td(t1); t2 <- ensure_td(t2); t3 <- ensure_td(t3)
  tlist <- list(t1, t2, t3)
  all_terms <- unique(unlist(lapply(tlist, function(x) x$term)))
  present <- term_order[term_order %in% all_terms]
  
  if (length(present) == 0) {
    return(data.frame(Variable = "(models did not converge)",
                      M1 = "\u2014", M2 = "\u2014", M3 = "\u2014",
                      .is_header = FALSE, stringsAsFactors = FALSE))
  }
  
  sec_hdrs <- list()
  if (length(LEAD_IVS) > 0) sec_hdrs[[LEAD_IVS[1]]] <- "Placebo treatment (future peacekeeping: t+1)"
  for (rc in RC_safe) sec_hdrs[[rc]] <- "Reverse-causality / security environment"
  for (ct in CTRL_safe) sec_hdrs[[ct]] <- "Standard controls"
  
  rows <- list()
  prev_sec <- ""
  for (trm in present) {
    hdr <- sec_hdrs[[trm]]
    if (!is.null(hdr) && hdr != prev_sec) {
      rows[[length(rows) + 1]] <- data.frame(Variable = hdr, M1 = "", M2 = "", M3 = "",
                                             .is_header = TRUE, stringsAsFactors = FALSE)
      prev_sec <- hdr
    }
    cells <- lapply(tlist, function(td) {
      r <- td[td$term == trm, , drop = FALSE]
      if (NROW(r) == 0) return("\u2014")
      if (scale == "or") fmt_cell_or(r$estimate[1], r$conf.low[1], r$conf.high[1], r$p.value[1])
      else fmt_cell_ci(r$estimate[1], r$conf.low[1], r$conf.high[1], r$p.value[1], se = r$std.error[1])
    })
    lbl <- if (trm %in% LEAD_IVS) label_term_lead(trm) else label_term(trm)
    rows[[length(rows) + 1]] <- data.frame(Variable = lbl, M1 = cells[[1]], M2 = cells[[2]],
                                           M3 = cells[[3]], .is_header = FALSE, stringsAsFactors = FALSE)
  }
  do.call(rbind, rows)
}

NOTE_PLACEBO <- c(
  NOTE_CI,
  "Placebo design: replaces contemporaneous PK measures with their one-year lead (t+1).",
  "Expectation: coefficients near zero (no anticipatory association if reverse causality is limited).",
  "All models include region FE and cubic time polynomial (suppressed)."
)

message("  J1: Placebo OLogit...")
plac_ol_td <- lapply(IVS_safe, function(iv) {
  tryCatch({
    m <- MASS::polr(stats::as.formula(paste0("hostlev_f ~ ", rhs_lead(iv))),
                    data = d_T1_lead, method = "logistic", Hess = TRUE, model = TRUE)
    tidy_polr_robust(m)
  }, error = function(e) { message("  [WARN] Placebo OLogit: ", conditionMessage(e)); empty_td })
})
plac_N_ol <- sapply(IVS_safe, function(iv) {
  tryCatch(nrow(stats::model.frame(MASS::polr(
    stats::as.formula(paste0("hostlev_f ~ ", rhs_lead(iv))),
    data = d_T1_lead, method = "logistic", Hess = FALSE, model = TRUE))),
    error = function(e) nrow(d_T1_lead))
})
ft_J1 <- build_lead_table(plac_ol_td[[1]], plac_ol_td[[2]], plac_ol_td[[3]],
                          TERM_ORDER_LEAD, "coef") |>
  add_n_row(plac_N_ol) |>
  make_flextable("Table J1. Placebo Test: Future PK (t+1) Predicting Current Hostility (Ordered Logit)",
                 NOTE_PLACEBO)

message("  J2: Placebo Logit...")
plac_lg_td <- lapply(IVS_safe, function(iv) {
  tryCatch({
    m <- stats::glm(stats::as.formula(paste0("initiator ~ ", rhs_lead(iv))),
                    data = d_T2_lead, family = stats::binomial("logit"))
    tidy_glm_simple(m)
  }, error = function(e) { message("  [WARN] Placebo Logit: ", conditionMessage(e)); empty_td })
})
plac_N_lg <- sapply(IVS_safe, function(iv) {
  tryCatch(nrow(stats::model.frame(stats::glm(
    stats::as.formula(paste0("initiator ~ ", rhs_lead(iv))),
    data = d_T2_lead, family = stats::binomial("logit")))),
    error = function(e) nrow(d_T2_lead))
})
ft_J2 <- build_lead_table(plac_lg_td[[1]], plac_lg_td[[2]], plac_lg_td[[3]],
                          TERM_ORDER_LEAD, "or") |>
  add_n_row(plac_N_lg) |>
  make_flextable("Table J2. Placebo Test: Future PK (t+1) Predicting Current Initiation (Logit, OR)",
                 c(NOTE_PLACEBO, "Reported estimates are odds ratios (exp(\u03b2))."))

message("  J3: Placebo ZINB...")
plac_z_td <- lapply(IVS_safe, function(iv) {
  lead_name <- paste0(iv, "_lead")
  rhs_z <- paste(c(lead_name, RHS_safe), collapse = " + ")
  tryCatch({
    m <- pscl::zeroinfl(
      stats::as.formula(paste0("mid_sum_clean ~ ", rhs_z, " | ",
                               paste(INFL_NEW, collapse = " + "))),
      data = d_T3_lead, dist = "negbin", link = "logit",
      control = pscl::zeroinfl.control(maxit = 3000))
    tidy_count_model(m)
  }, error = function(e) {
    tryCatch({
      m <- MASS::glm.nb(stats::as.formula(paste0("mid_sum_clean ~ ", rhs_z)),
                        data = d_T3_lead, control = stats::glm.control(maxit = 100))
      tidy_glm_simple(m)
    }, error = function(e2) { message("  [WARN] Placebo ZINB: ", conditionMessage(e2)); empty_td })
  })
})
plac_N_z <- rep(nrow(d_T3_lead), 3)
ft_J3 <- build_lead_table(plac_z_td[[1]], plac_z_td[[2]], plac_z_td[[3]],
                          TERM_ORDER_LEAD, "coef") |>
  add_n_row(plac_N_z) |>
  make_flextable("Table J3. Placebo Test: Future PK (t+1) Predicting Current MID Frequency (ZINB Count)",
                 c(NOTE_PLACEBO, "Count-model coefficients. DV: mid_sum_clean. Inflate: cum_peace_10pp + high_igo + no_contig."))

message("  J4: Placebo summary...")
plac_summary <- lapply(IVS_safe, function(iv) {
  lead_iv <- paste0(iv, "_lead")
  c_ol <- tryCatch({
    td <- plac_ol_td[[match(iv, IVS_safe)]]
    r <- td[td$term == lead_iv, ]
    if (NROW(r)) fmt_cell_ci(r$estimate, r$conf.low, r$conf.high, r$p.value, se = r$std.error) else "\u2014"
  }, error = function(e) "\u2014")
  c_lg <- tryCatch({
    td <- plac_lg_td[[match(iv, IVS_safe)]]
    r <- td[td$term == lead_iv, ]
    if (NROW(r)) fmt_cell_or(r$estimate, r$conf.low, r$conf.high, r$p.value) else "\u2014"
  }, error = function(e) "\u2014")
  c_z <- tryCatch({
    td <- plac_z_td[[match(iv, IVS_safe)]]
    r <- td[td$term == lead_iv, ]
    if (NROW(r)) fmt_cell_ci(r$estimate, r$conf.low, r$conf.high, r$p.value, se = r$std.error) else "\u2014"
  }, error = function(e) "\u2014")
  data.frame(Variable = label_term_lead(lead_iv),
             OLogit_Coef = c_ol, Logit_OR = c_lg, ZINB_Count = c_z,
             stringsAsFactors = FALSE)
})
ft_J4 <- make_simple_ft(do.call(rbind, plac_summary),
                        "Table J4. Placebo Summary: Lead PK (t+1) Effects Across All Three DVs",
                        c(NOTE_CI, "Expectation: null effects (no anticipatory association)."))

message("  Section J complete (J1-J4).")

message("\n=== SECTION K: Appendix Figures ===")

has_ggplot <- requireNamespace("ggplot2", quietly = TRUE)
if (!has_ggplot) message("  [SKIP] ggplot2 not available; all figures skipped.")

fig_paths <- character(0)

if (has_ggplot) {
  
  message("  Figure A1: Coefficient forest plot...")
  tryCatch({
    forest_rows <- list()
    for (iv in IVS_safe) {
      idx <- match(iv, IVS_safe)
      ivlab <- label_term(iv)
      td_ol <- switch(idx, t1_1, t1_2, t1_3)
      r <- td_ol[td_ol$term == iv, ]
      if (NROW(r)) forest_rows[[length(forest_rows)+1]] <- data.frame(
        iv = ivlab, outcome = "Hostility (OLogit)", estimate = r$estimate,
        lo = r$conf.low, hi = r$conf.high, stringsAsFactors = FALSE)
      td_lg <- switch(idx, t2_1, t2_2, t2_3)
      r <- td_lg[td_lg$term == iv, ]
      if (NROW(r)) forest_rows[[length(forest_rows)+1]] <- data.frame(
        iv = ivlab, outcome = "Initiation (Logit)", estimate = r$estimate,
        lo = r$conf.low, hi = r$conf.high, stringsAsFactors = FALSE)
      td_z <- switch(idx, t3_1, t3_2, t3_3)
      r <- td_z[td_z$term == iv, ]
      if (NROW(r)) forest_rows[[length(forest_rows)+1]] <- data.frame(
        iv = ivlab, outcome = "MID Count (ZINB)", estimate = r$estimate,
        lo = r$conf.low, hi = r$conf.high, stringsAsFactors = FALSE)
    }
    fdf <- do.call(rbind, forest_rows)
    fdf$outcome <- factor(fdf$outcome, levels = c("Hostility (OLogit)", "Initiation (Logit)", "MID Count (ZINB)"))
    fdf$iv <- factor(fdf$iv, levels = rev(unique(fdf$iv)))
    
    fig_A1 <- ggplot2::ggplot(fdf, ggplot2::aes(x = estimate, y = iv, color = outcome, shape = outcome)) +
      ggplot2::geom_vline(xintercept = 0, linetype = "dashed", color = "gray50", linewidth = 0.5) +
      ggplot2::geom_errorbarh(ggplot2::aes(xmin = lo, xmax = hi),
                              height = 0.2, linewidth = 1.1, position = ggplot2::position_dodge(width = 0.5)) +
      ggplot2::geom_point(size = 3.5, position = ggplot2::position_dodge(width = 0.5)) +
      ggplot2::scale_color_manual(values = c(COL_OL, COL_LG, COL_ZI)) +
      ggplot2::scale_shape_manual(values = c(16, 17, 15)) +
      ggplot2::labs(title = "Figure A1. Baseline Coefficient Estimates Across All DVs",
                    subtitle = "Point estimates with 95% confidence intervals",
                    x = "Coefficient Estimate", y = NULL) +
      theme_pub(11) + ggplot2::theme(legend.position = "bottom")
    fig_paths <- c(fig_paths, save_fig(fig_A1, "FigA1_Coefficient_Forest.png", w = 10, h = 6))
  }, error = function(e) message("  [WARN] Figure A1: ", conditionMessage(e)))
  
  message("  Figure A2: Predicted hostility probs...")
  tryCatch({
    nd0 <- nd1 <- as.data.frame(lapply(stats::model.frame(m_ol_1)[, -1, drop = FALSE],
                                       function(x) if(is.numeric(x)) rep(mean(x, na.rm=TRUE), 2) else rep(x[1], 2)))
    nd0$troop_binary <- 0; nd1$troop_binary <- 1
    pr0 <- predict(m_ol_1, newdata = nd0, type = "probs")[1,]
    pr1 <- predict(m_ol_1, newdata = nd1, type = "probs")[1,]
    plong <- data.frame(
      level = rep(names(pr0), 2),
      prob = c(as.numeric(pr0), as.numeric(pr1)),
      scenario = rep(c("No Troops", "Troops Sent"), each = length(pr0)),
      stringsAsFactors = FALSE)
    plong$scenario <- factor(plong$scenario, levels = c("No Troops", "Troops Sent"))
    plong$level <- factor(plong$level, levels = names(pr0))
    
    fig_A2 <- ggplot2::ggplot(plong, ggplot2::aes(x = level, y = prob, fill = scenario)) +
      ggplot2::geom_col(position = ggplot2::position_dodge(0.75), width = 0.7,
                        color = "black", linewidth = 0.3) +
      ggplot2::scale_fill_manual(values = c(COL_NOTROOPS, COL_TROOPS)) +
      ggplot2::scale_y_continuous(expand = c(0, 0)) +
      ggplot2::labs(title = "Figure A2. Predicted Hostility Probabilities",
                    subtitle = "Troop deployment vs. no deployment (OLogit, full specification)",
                    x = "Hostility Level", y = "Predicted Probability") +
      theme_pub(10) + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 25, hjust = 1))
    fig_paths <- c(fig_paths, save_fig(fig_A2, "FigA2_Hostility_Probs.png", w = 10, h = 6))
  }, error = function(e) message("  [WARN] Figure A2: ", conditionMessage(e)))
  
  message("  Figure A3: Initiation predicted probs...")
  tryCatch({
    init_df <- lapply(seq_along(IVS_safe), function(idx) {
      iv <- IVS_safe[idx]
      m <- switch(idx, m_lg_1, m_lg_2, m_lg_3)
      mf <- stats::model.frame(m)[, -1, drop = FALSE]
      nd_base <- as.data.frame(lapply(mf, function(x)
        if(is.numeric(x)) mean(x, na.rm=TRUE) else x[1]))[rep(1,1),,drop=FALSE]
      if (iv == "troop_binary") {
        nd0 <- nd_base; nd0[[iv]] <- 0; nd1 <- nd_base; nd1[[iv]] <- 1
      } else {
        r <- range(d_T2[[iv]], na.rm = TRUE)
        nd0 <- nd_base; nd0[[iv]] <- r[1]; nd1 <- nd_base; nd1[[iv]] <- r[2]
      }
      p0 <- predict(m, newdata = nd0, type = "response")
      p1 <- predict(m, newdata = nd1, type = "response")
      data.frame(iv = label_term(iv),
                 scenario = c("Low/Off", "High/On"),
                 prob = c(p0, p1), stringsAsFactors = FALSE)
    })
    idf <- do.call(rbind, init_df)
    idf$scenario <- factor(idf$scenario, levels = c("Low/Off", "High/On"))
    
    fig_A3 <- ggplot2::ggplot(idf, ggplot2::aes(x = iv, y = prob, fill = scenario)) +
      ggplot2::geom_col(position = ggplot2::position_dodge(0.7), width = 0.6,
                        color = "black", linewidth = 0.3) +
      ggplot2::scale_fill_manual(values = c(COL_NOTROOPS, COL_TROOPS)) +
      ggplot2::scale_y_continuous(expand = c(0, 0)) +
      ggplot2::labs(title = "Figure A3. Predicted Initiation Probability by PK Measure",
                    x = NULL, y = "Pr(Initiation = 1)") +
      theme_pub(11) + ggplot2::theme(legend.position = "bottom")
    fig_paths <- c(fig_paths, save_fig(fig_A3, "FigA3_Initiation_Probs.png", w = 9, h = 6))
  }, error = function(e) message("  [WARN] Figure A3: ", conditionMessage(e)))
  
  message("  Figure A4: ZINB expected counts...")
  tryCatch({
    if (inherits(m_z_1, "zeroinfl")) {
      cnt_names <- names(stats::coef(m_z_1, "count"))[-1]
      cnt_names <- cnt_names[cnt_names %in% names(d_T3)]
      nd_z <- as.data.frame(lapply(d_T3[, cnt_names, drop = FALSE],
                                   function(x) if(is.numeric(x)) mean(x, na.rm=TRUE) else x[1]))[rep(1,1),,drop=FALSE]
      nd_z0 <- nd_z; nd_z0$troop_binary <- 0
      nd_z1 <- nd_z; nd_z1$troop_binary <- 1
      c0 <- predict(m_z_1, newdata = nd_z0, type = "response")
      c1 <- predict(m_z_1, newdata = nd_z1, type = "response")
      zdf <- data.frame(scenario = factor(c("No Troops", "Troops Sent"),
                                          levels = c("No Troops", "Troops Sent")), count = c(c0, c1))
      
      fig_A4 <- ggplot2::ggplot(zdf, ggplot2::aes(x = scenario, y = count, fill = scenario)) +
        ggplot2::geom_col(width = 0.55, color = "black", linewidth = 0.5) +
        ggplot2::scale_fill_manual(values = c(COL_NOTROOPS, COL_TROOPS)) +
        ggplot2::scale_y_continuous(expand = c(0, 0)) +
        ggplot2::labs(title = "Figure A4. Expected MID Count by Troop Deployment",
                      subtitle = "ZINB model, full specification",
                      x = NULL, y = "Expected MID Count") +
        theme_pub(11) + ggplot2::theme(legend.position = "none")
      fig_paths <- c(fig_paths, save_fig(fig_A4, "FigA4_ZINB_Expected_Count.png", w = 8, h = 6))
    }
  }, error = function(e) message("  [WARN] Figure A4: ", conditionMessage(e)))
  
  message("  Figure A5: Zero-inflation distribution...")
  tryCatch({
    if (inherits(m_z_1, "zeroinfl")) {
      zi_prob <- predict(m_z_1, type = "zero")
      zidf <- data.frame(zi_prob = zi_prob)
      fig_A5 <- ggplot2::ggplot(zidf, ggplot2::aes(x = zi_prob)) +
        ggplot2::geom_histogram(bins = 50, fill = COL_ZI, color = "white",
                                alpha = 0.85, linewidth = 0.2) +
        ggplot2::geom_vline(xintercept = mean(zi_prob, na.rm = TRUE),
                            linetype = "dashed", color = "red", linewidth = 0.8) +
        ggplot2::labs(title = "Figure A5. Distribution of Structural-Zero Probabilities",
                      subtitle = paste0("ZINB Model. Mean = ", formatC(mean(zi_prob, na.rm=TRUE), format="f", digits=3)),
                      x = "Pr(Structural Zero)", y = "Count") +
        theme_pub(11)
      fig_paths <- c(fig_paths, save_fig(fig_A5, "FigA5_ZeroInflation_Dist.png", w = 9, h = 6))
    }
  }, error = function(e) message("  [WARN] Figure A5: ", conditionMessage(e)))
  
  message("  Figure A6: Time trends...")
  tryCatch({
    d0_fig <- d0
    d0_fig$yr <- if ("year" %in% names(d0)) as.numeric(d0$year) else as.numeric(d0$time) + 1989
    yr_df <- d0_fig %>%
      dplyr::group_by(yr) %>%
      dplyr::summarize(
        pct_troops = mean(as.numeric(troop_binary), na.rm = TRUE),
        mean_mid = mean(as.numeric(mid_sum), na.rm = TRUE),
        mean_host = mean(as.numeric(hostlev), na.rm = TRUE),
        .groups = "drop")
    yr_long <- tidyr::pivot_longer(yr_df, cols = c(pct_troops, mean_mid, mean_host),
                                   names_to = "variable", values_to = "value")
    yr_long$variable <- dplyr::case_when(
      yr_long$variable == "pct_troops" ~ "% PK Deployed",
      yr_long$variable == "mean_mid" ~ "Mean MID Count",
      yr_long$variable == "mean_host" ~ "Mean Hostility",
      TRUE ~ yr_long$variable)
    
    fig_A6 <- ggplot2::ggplot(yr_long, ggplot2::aes(x = yr, y = value, color = variable)) +
      ggplot2::geom_line(linewidth = 1.1) + ggplot2::geom_point(size = 2) +
      ggplot2::scale_color_manual(values = c(COL_POINT_1, COL_POINT_2, COL_POINT_3)) +
      ggplot2::labs(title = "Figure A6. PK Deployment and MID Trends Over Time",
                    subtitle = "Country-year panel means, 1990\u20132014",
                    x = "Year", y = "Value (Panel Mean)") +
      theme_pub(11) + ggplot2::theme(legend.position = "bottom")
    fig_paths <- c(fig_paths, save_fig(fig_A6, "FigA6_Time_Trends.png", w = 10, h = 6))
  }, error = function(e) message("  [WARN] Figure A6: ", conditionMessage(e)))
  
  message("  Figure A7: Hostility distribution...")
  tryCatch({
    hdf <- d_T1 %>% dplyr::mutate(
      pk = factor(ifelse(troop_binary == 1, "PK Deployed", "No PK"),
                  levels = c("No PK", "PK Deployed"))) %>%
      dplyr::group_by(pk, hostlev_f) %>%
      dplyr::summarize(n = dplyr::n(), .groups = "drop") %>%
      dplyr::group_by(pk) %>% dplyr::mutate(pct = n / sum(n))
    
    fig_A7 <- ggplot2::ggplot(hdf, ggplot2::aes(x = hostlev_f, y = pct, fill = pk)) +
      ggplot2::geom_col(position = ggplot2::position_dodge(0.8), width = 0.7,
                        color = "black", linewidth = 0.3) +
      ggplot2::scale_fill_manual(values = c(COL_NOTROOPS, COL_TROOPS)) +
      ggplot2::scale_y_continuous(labels = scales::percent_format(1), expand = c(0, 0)) +
      ggplot2::labs(title = "Figure A7. Hostility Level Distribution by PK Status",
                    x = "Hostility Level", y = "Proportion of Observations") +
      theme_pub(10) +
      ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 25, hjust = 1),
                     legend.position = "bottom")
    fig_paths <- c(fig_paths, save_fig(fig_A7, "FigA7_Hostility_Distribution.png", w = 10, h = 6))
  }, error = function(e) message("  [WARN] Figure A7: ", conditionMessage(e)))
  
  message("  Figure A8: Specification curve...")
  tryCatch({
    spec_rows <- list()
    spec_id <- 0
    spec_models <- list(
      list(lab = "OLogit (Baseline)", est_fn = function() {
        r <- t1_1[t1_1$term == "troop_binary",]; c(r$estimate, r$conf.low, r$conf.high) }),
      list(lab = "Logit OR (Baseline)", est_fn = function() {
        r <- t2_1[t2_1$term == "troop_binary",]; c(r$estimate, r$conf.low, r$conf.high) }),
      list(lab = "ZINB Count (Baseline)", est_fn = function() {
        r <- t3_1[t3_1$term == "troop_binary",]; c(r$estimate, r$conf.low, r$conf.high) })
    )
    if (!is.null(ft_B6)) spec_models <- c(spec_models, list(
      list(lab = "PSM OLogit", est_fn = function() {
        if (exists("psm_ol_td") && length(psm_ol_td) >= 1) {
          r <- psm_ol_td[[1]][psm_ol_td[[1]]$term == "troop_binary",]
          if (NROW(r)) c(r$estimate, r$conf.low, r$conf.high) else rep(NA, 3)
        } else rep(NA, 3) })))
    if (!is.null(ft_B9)) spec_models <- c(spec_models, list(
      list(lab = "MI OLogit", est_fn = function() {
        if (exists("mi_ol_td") && length(mi_ol_td) >= 1) {
          r <- mi_ol_td[[1]][mi_ol_td[[1]]$term == "troop_binary",]
          if (NROW(r)) c(r$estimate, r$conf.low, r$conf.high) else rep(NA, 3)
        } else rep(NA, 3) })))
    spec_models <- c(spec_models, list(
      list(lab = "IPW OLogit", est_fn = function() {
        tryCatch({
          m <- MASS::polr(stats::as.formula(paste0("hostlev_f ~ ", rhs_full("troop_binary"))),
                          data = d_T1, weights = d_T1$ipw_B, method = "logistic", Hess = TRUE, model = TRUE)
          td <- tidy_polr_robust(m)
          r <- td[td$term == "troop_binary",]
          if (NROW(r)) c(r$estimate, r$conf.low, r$conf.high) else rep(NA, 3)
        }, error = function(e) rep(NA, 3)) }),
      list(lab = "Placebo (t+1)", est_fn = function() {
        if (length(plac_ol_td) >= 1) {
          r <- plac_ol_td[[1]][plac_ol_td[[1]]$term == "troop_binary_lead",]
          if (NROW(r)) c(r$estimate, r$conf.low, r$conf.high) else rep(NA, 3)
        } else rep(NA, 3) })
    ))
    
    for (sp in spec_models) {
      spec_id <- spec_id + 1
      vals <- tryCatch(sp$est_fn(), error = function(e) rep(NA, 3))
      if (length(vals) == 3 && !all(is.na(vals)))
        spec_rows[[length(spec_rows)+1]] <- data.frame(
          id = spec_id, model = sp$lab,
          estimate = vals[1], lo = vals[2], hi = vals[3], stringsAsFactors = FALSE)
    }
    sdf <- do.call(rbind, spec_rows)
    sdf <- sdf[order(sdf$estimate), ]
    sdf$rank <- seq_len(nrow(sdf))
    
    fig_A8 <- ggplot2::ggplot(sdf, ggplot2::aes(x = rank, y = estimate)) +
      ggplot2::geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
      ggplot2::geom_errorbar(ggplot2::aes(ymin = lo, ymax = hi), width = 0.3,
                             linewidth = 0.8, color = "gray40") +
      ggplot2::geom_point(size = 3, color = COL_POINT_1) +
      ggplot2::scale_x_continuous(breaks = sdf$rank, labels = sdf$model) +
      ggplot2::labs(title = "Figure A8. Specification Curve: Troop Binary Effect",
                    subtitle = "Coefficient estimates across model specifications",
                    x = NULL, y = "Coefficient Estimate") +
      theme_pub(10) + ggplot2::coord_flip()
    fig_paths <- c(fig_paths, save_fig(fig_A8, "FigA8_Specification_Curve.png", w = 10, h = 7))
  }, error = function(e) message("  [WARN] Figure A8: ", conditionMessage(e)))
  
  message("  Figure A9: Placebo coefficient plot...")
  tryCatch({
    plac_rows <- list()
    for (iv in IVS_safe) {
      lead_iv <- paste0(iv, "_lead")
      idx <- match(iv, IVS_safe)
      ivlab <- label_term(iv)
      r <- plac_ol_td[[idx]][plac_ol_td[[idx]]$term == lead_iv, ]
      if (NROW(r)) plac_rows[[length(plac_rows)+1]] <- data.frame(iv = ivlab,
                                                                  outcome = "OLogit", estimate = r$estimate, lo = r$conf.low, hi = r$conf.high)
      r <- plac_lg_td[[idx]][plac_lg_td[[idx]]$term == lead_iv, ]
      if (NROW(r)) plac_rows[[length(plac_rows)+1]] <- data.frame(iv = ivlab,
                                                                  outcome = "Logit", estimate = r$estimate, lo = r$conf.low, hi = r$conf.high)
      r <- plac_z_td[[idx]][plac_z_td[[idx]]$term == lead_iv, ]
      if (NROW(r)) plac_rows[[length(plac_rows)+1]] <- data.frame(iv = ivlab,
                                                                  outcome = "ZINB", estimate = r$estimate, lo = r$conf.low, hi = r$conf.high)
    }
    pdf <- do.call(rbind, plac_rows)
    pdf$outcome <- factor(pdf$outcome, levels = c("OLogit", "Logit", "ZINB"))
    pdf$iv <- factor(pdf$iv, levels = rev(unique(pdf$iv)))
    
    fig_A9 <- ggplot2::ggplot(pdf, ggplot2::aes(x = estimate, y = iv, color = outcome, shape = outcome)) +
      ggplot2::geom_vline(xintercept = 0, linetype = "dashed", color = "gray50", linewidth = 0.5) +
      ggplot2::geom_errorbarh(ggplot2::aes(xmin = lo, xmax = hi),
                              height = 0.2, linewidth = 1.0, position = ggplot2::position_dodge(0.4)) +
      ggplot2::geom_point(size = 3.5, position = ggplot2::position_dodge(0.4)) +
      ggplot2::scale_color_manual(values = c(COL_OL, COL_LG, COL_ZI)) +
      ggplot2::scale_shape_manual(values = c(16, 17, 15)) +
      ggplot2::labs(title = "Figure A9. Placebo Test: Lead PK (t+1) Coefficient Estimates",
                    subtitle = "Expectation: all CIs include zero",
                    x = "Coefficient Estimate", y = NULL) +
      theme_pub(11) + ggplot2::theme(legend.position = "bottom")
    fig_paths <- c(fig_paths, save_fig(fig_A9, "FigA9_Placebo_Coefficients.png", w = 10, h = 6))
  }, error = function(e) message("  [WARN] Figure A9: ", conditionMessage(e)))
  
  message("  Figure A10: MID count distribution...")
  tryCatch({
    mid_tab <- as.data.frame(table(d_T3$mid_sum), stringsAsFactors = FALSE)
    names(mid_tab) <- c("count", "freq")
    mid_tab$count <- as.integer(mid_tab$count)
    mid_tab$pct <- mid_tab$freq / sum(mid_tab$freq)
    zero_pct <- mid_tab$pct[mid_tab$count == 0]
    
    fig_A10 <- ggplot2::ggplot(mid_tab, ggplot2::aes(x = count, y = pct)) +
      ggplot2::geom_col(fill = ifelse(mid_tab$count == 0, COL_POINT_2, COL_POINT_1),
                        color = "white", linewidth = 0.3) +
      ggplot2::annotate("text", x = 0, y = zero_pct + 0.02,
                        label = paste0(formatC(100*zero_pct, format = "f", digits = 1), "% zeros"),
                        fontface = "bold", size = 4, family = BASE_FAMILY) +
      ggplot2::scale_y_continuous(labels = scales::percent_format(1), expand = c(0, 0)) +
      ggplot2::labs(title = "Figure A10. Distribution of MID Counts",
                    subtitle = paste0("Analytic sample (N = ", formatC(nrow(d_T3), big.mark = ","), ")"),
                    x = "MID Count (mid_sum)", y = "Proportion") +
      theme_pub(11)
    fig_paths <- c(fig_paths, save_fig(fig_A10, "FigA10_MID_Distribution.png", w = 9, h = 6))
  }, error = function(e) message("  [WARN] Figure A10: ", conditionMessage(e)))
  
  message("  Section K complete: ", length(fig_paths), " figures generated.")
  
}

# Section 11. Export
message("\n[Writing DOCX]...")

supp_path <- file.path(SUPP_DIR, "PKMID_JCR_Online_Appendix.docx")
sdoc <- officer::read_docx()

add_sec <- function(doc, heading, desc, tables) {
  doc <- officer::body_add_par(doc, heading, style = "heading 1")
  if (nzchar(desc)) doc <- officer::body_add_par(doc, desc, style = "Normal")
  doc <- officer::body_add_par(doc, "", style = "Normal")
  for (ft in tables) {
    if (!is.null(ft)) {
      doc <- tryCatch(flextable::body_add_flextable(doc, ft),
                      error = function(e) { message("  [WARN] Could not add table: ", conditionMessage(e)); doc })
      doc <- officer::body_add_par(doc, "", style = "Normal")
      doc <- officer::body_add_break(doc)
    }
  }
  doc
}

ft_C1 <- NULL
tryCatch({
  d_biv <- d0
  d_biv <- d_biv[order(d_biv$cow, d_biv$year), , drop = FALSE]
  d_biv$cow <- as.integer(d_biv$cow)
  d_biv$year <- as.integer(d_biv$year)
  
  biv_zscore <- function(x) {
    mu <- mean(x, na.rm = TRUE)
    s <- sd(x, na.rm = TRUE)
    if (is.na(s) || s == 0) return(x)
    (x - mu) / s
  }
  
  if (!"peace_sq" %in% names(d_biv) && "logged_pk5years" %in% names(d_biv))
    d_biv$peace_sq <- d_biv$logged_pk5years^2
  if (!"ln_pop" %in% names(d_biv)) {
    if ("pop" %in% names(d_biv)) d_biv$ln_pop <- log(pmax(as.numeric(d_biv$pop), 1e-8))
    else if ("population" %in% names(d_biv)) d_biv$ln_pop <- log(pmax(as.numeric(d_biv$population), 1e-8))
  }
  if (!"polity2" %in% names(d_biv) && "democracy_polity2" %in% names(d_biv))
    d_biv$polity2 <- d_biv$democracy_polity2
  
  d_biv <- dplyr::group_by(d_biv, cow) |>
    dplyr::mutate(
      lag_ln_max = dplyr::lag(ln_max_total_peacekeeper, 1),
      lag_troop_binary = dplyr::lag(troop_binary, 1),
      lag_peace_sq_raw = dplyr::lag(peace_sq, 1)
    ) |>
    dplyr::ungroup()
  
  d_biv$z_lag_ln_max <- biv_zscore(d_biv$lag_ln_max)
  d_biv$z_peace_sq <- biv_zscore(d_biv$peace_sq)
  d_biv$z_lag_peace_sq <- biv_zscore(d_biv$lag_peace_sq_raw)
  d_biv$ihs_mid <- asinh(d_biv$mid_sum)
  
  biv_instruments <- intersect(c("lag_mid_bin", "ln_capability", "trade_openness",
                                 "igo_membership", "ln_pop", "polity2"), names(d_biv))
  IVS_M1 <- c("troop_binary", "z_lag_ln_max", "z_peace_sq")
  IVS_M3 <- c("lag_troop_binary", "z_lag_ln_max", "z_lag_peace_sq")
  biv_iv_labels <- c("Troop Contribution\n(Binary)", "Max Troops t−1\n(z-scored)", "Cumul. Exposure²\n(z-scored)")
  biv_dvs <- c("hostlev", "initiator", "ihs_mid")
  biv_dv_labels <- c(hostlev = "Hostility Level (0–5)", initiator = "Conflict Initiation", ihs_mid = "MID Frequency — IHS(mid_sum)")
  
  biv_get_coef <- function(model, cluster_var, term) {
    b <- stats::coef(model)
    V <- tryCatch(sandwich::vcovCL(model, cluster = cluster_var, type = "HC1"),
                  error = function(e) stats::vcov(model))
    se <- sqrt(pmax(diag(V), 0))
    pv <- 2 * stats::pnorm(abs(b / se), lower.tail = FALSE)
    idx <- which(names(b) == term)
    if (!length(idx)) return(NULL)
    list(b = b[idx], se = se[idx], p = pv[idx],
         lo = b[idx] - 1.96 * se[idx], hi = b[idx] + 1.96 * se[idx],
         n = stats::nobs(model),
         ll = tryCatch(as.numeric(stats::logLik(model)), error = function(e) NA_real_))
  }
  
  biv_fmt <- function(b, se, p, lo, hi) {
    if (any(is.na(c(b, se, p)))) return("—")
    paste0(formatC(b, format = "f", digits = 3), stars(p), "\n(",
           formatC(se, format = "f", digits = 3), ")\n[",
           formatC(lo, format = "f", digits = 3), ", ",
           formatC(hi, format = "f", digits = 3), "]")
  }
  
  biv_lpm_fe <- function(dv, iv) {
    sub <- d_biv[stats::complete.cases(d_biv[, c(dv, iv, "cow")]), ]
    if (!nrow(sub)) return(NULL)
    m <- stats::lm(stats::as.formula(paste(dv, "~", iv, "+ factor(cow)")), data = sub)
    biv_get_coef(m, sub$cow, iv)
  }
  
  biv_lpm_cf <- function(dv, iv) {
    req <- unique(c("cow", iv, dv, biv_instruments))
    sub <- d_biv[stats::complete.cases(d_biv[, req]), ]
    if (!nrow(sub)) return(NULL)
    fs <- stats::lm(stats::as.formula(paste(iv, "~", paste(c(biv_instruments, "factor(cow)"), collapse = " + "))), data = sub)
    sub$cf_term <- stats::residuals(fs)
    m <- stats::lm(stats::as.formula(paste(dv, "~", iv, "+ cf_term + factor(cow)")), data = sub)
    biv_get_coef(m, sub$cow, iv)
  }
  
  biv_results <- list()
  for (i in seq_along(IVS_M1)) {
    iv_m1 <- IVS_M1[i]
    iv_m3 <- IVS_M3[i]
    for (dv in biv_dvs) {
      biv_results[[paste("M1", iv_m1, dv, sep = "|")]] <- biv_lpm_fe(dv, iv_m1)
      biv_results[[paste("M2", iv_m1, dv, sep = "|")]] <- biv_lpm_cf(dv, iv_m1)
      biv_results[[paste("M3", iv_m3, dv, sep = "|")]] <- biv_lpm_fe(dv, iv_m3)
    }
  }
  
  biv_cell <- function(model, iv_idx, dv) {
    iv_key <- if (model == "M3") IVS_M3[iv_idx] else IVS_M1[iv_idx]
    r <- biv_results[[paste(model, iv_key, dv, sep = "|")]]
    if (is.null(r)) return("—")
    biv_fmt(r$b, r$se, r$p, r$lo, r$hi)
  }
  biv_get_n <- function(model, iv_idx, dv) {
    iv_key <- if (model == "M3") IVS_M3[iv_idx] else IVS_M1[iv_idx]
    r <- biv_results[[paste(model, iv_key, dv, sep = "|")]]
    if (is.null(r)) "—" else formatC(r$n, format = "d", big.mark = ",")
  }
  biv_get_ll <- function(model, iv_idx, dv) {
    iv_key <- if (model == "M3") IVS_M3[iv_idx] else IVS_M1[iv_idx]
    r <- biv_results[[paste(model, iv_key, dv, sep = "|")]]
    if (is.null(r) || is.na(r$ll)) "—" else formatC(r$ll, format = "f", digits = 1)
  }
  
  COL1 <- "Dependent Variable / Model"
  COL2 <- biv_iv_labels[1]
  COL3 <- biv_iv_labels[2]
  COL4 <- biv_iv_labels[3]
  
  biv_build_row <- function(label, model, dv_key) {
    tibble::tibble(
      !!COL1 := label,
      !!COL2 := biv_cell(model, 1, dv_key),
      !!COL3 := biv_cell(model, 2, dv_key),
      !!COL4 := biv_cell(model, 3, dv_key)
    )
  }
  
  table_rows <- list()
  for (dv_key in biv_dvs) {
    table_rows[[paste0(dv_key, "_hdr")]] <- tibble::tibble(!!COL1 := biv_dv_labels[dv_key], !!COL2 := "", !!COL3 := "", !!COL4 := "")
    table_rows[[paste0(dv_key, "_m1")]] <- biv_build_row("  Baseline Bivariate (M1)", "M1", dv_key)
    table_rows[[paste0(dv_key, "_m2")]] <- biv_build_row("  Selection CF (M2)", "M2", dv_key)
    table_rows[[paste0(dv_key, "_m3")]] <- biv_build_row("  Reverse Causality — Lag t−1 (M3)", "M3", dv_key)
    table_rows[[paste0(dv_key, "_n")]] <- tibble::tibble(
      !!COL1 := "  N: M1 / M2 / M3",
      !!COL2 := paste(biv_get_n("M1", 1, dv_key), biv_get_n("M2", 1, dv_key), biv_get_n("M3", 1, dv_key), sep = " / "),
      !!COL3 := paste(biv_get_n("M1", 2, dv_key), biv_get_n("M2", 2, dv_key), biv_get_n("M3", 2, dv_key), sep = " / "),
      !!COL4 := paste(biv_get_n("M1", 3, dv_key), biv_get_n("M2", 3, dv_key), biv_get_n("M3", 3, dv_key), sep = " / ")
    )
    table_rows[[paste0(dv_key, "_ll")]] <- tibble::tibble(
      !!COL1 := "  Log-Likelihood: M1 / M2 / M3",
      !!COL2 := paste(biv_get_ll("M1", 1, dv_key), biv_get_ll("M2", 1, dv_key), biv_get_ll("M3", 1, dv_key), sep = " / "),
      !!COL3 := paste(biv_get_ll("M1", 2, dv_key), biv_get_ll("M2", 2, dv_key), biv_get_ll("M3", 2, dv_key), sep = " / "),
      !!COL4 := paste(biv_get_ll("M1", 3, dv_key), biv_get_ll("M2", 3, dv_key), biv_get_ll("M3", 3, dv_key), sep = " / ")
    )
  }
  
  biv_df <- dplyr::bind_rows(table_rows)
  is_dv_hdr <- function(x) x %in% unname(biv_dv_labels)
  is_m2_row <- function(x) grepl("Selection CF", x)
  is_m3_row <- function(x) grepl("Reverse Causality", x)
  is_stat <- function(x) grepl("^  N:|^  Log-", x)
  
  b_thick <- officer::fp_border(color = "black", width = 1.8)
  b_mid <- officer::fp_border(color = "black", width = 0.9)
  b_light <- officer::fp_border(color = "#999999", width = 0.4)
  
  ft_C1 <- flextable::flextable(biv_df) |>
    flextable::set_header_labels(values = setNames(list("Dependent Variable / Model", biv_iv_labels[1], biv_iv_labels[2], biv_iv_labels[3]),
                                                   c(COL1, COL2, COL3, COL4))) |>
    flextable::font(fontname = "Times New Roman", part = "all") |>
    flextable::fontsize(size = 9.5, part = "header") |>
    flextable::fontsize(size = 9, part = "body") |>
    flextable::align(align = "left", j = 1, part = "all") |>
    flextable::align(align = "center", j = 2:4, part = "all") |>
    flextable::border_remove() |>
    flextable::hline_top(border = b_thick, part = "header") |>
    flextable::hline(border = b_mid, part = "header") |>
    flextable::hline_bottom(border = b_thick, part = "body") |>
    flextable::bold(i = ~ is_dv_hdr(get(COL1)), part = "body") |>
    flextable::bg(i = ~ is_dv_hdr(get(COL1)), bg = "#E4E4E4", part = "body") |>
    flextable::hline(i = ~ is_dv_hdr(get(COL1)), border = b_mid, part = "body") |>
    flextable::italic(i = ~ is_m2_row(get(COL1)), part = "body") |>
    flextable::italic(i = ~ is_m3_row(get(COL1)), part = "body") |>
    flextable::fontsize(i = ~ is_stat(get(COL1)), size = 7.5, part = "body") |>
    flextable::color(i = ~ is_stat(get(COL1)), color = "#555555", part = "body") |>
    flextable::italic(i = ~ is_stat(get(COL1)), part = "body") |>
    flextable::hline(i = ~ is_stat(get(COL1)), border = b_light, part = "body") |>
    flextable::padding(padding.top = 6, padding.bottom = 6, part = "all") |>
    flextable::padding(i = ~ is_dv_hdr(get(COL1)), padding.top = 8, padding.bottom = 4, part = "body") |>
    flextable::padding(i = ~ is_stat(get(COL1)), padding.top = 2, padding.bottom = 2, part = "body") |>
    flextable::width(j = 1, width = 2.4) |>
    flextable::width(j = 2:4, width = 1.7) |>
    flextable::line_spacing(space = 1.1, part = "body") |>
    flextable::add_header_lines(values = "Table C1. Bivariate Robustness Checks") |>
    flextable::bold(i = 1, part = "header") |>
    flextable::fontsize(i = 1, size = 11, part = "header") |>
    flextable::align(i = 1, align = "left", part = "header")
  
  c1_note <- paste0(
    "Note: All models are bivariate, including only the focal peacekeeping variable and country fixed effects. ",
    "M1 is the baseline bivariate specification, M2 applies a control-function correction, and M3 lags the peacekeeping variables one year. ",
    "Cells report coefficient", stars(0.01), " (clustered SE) [95% CI]."
  )
  ft_C1 <- flextable::add_footer_lines(ft_C1, values = c1_note) |>
    flextable::italic(part = "footer") |>
    flextable::fontsize(size = 9, part = "footer") |>
    flextable::align(align = "left", part = "footer") |>
    flextable::color(color = "gray40", part = "footer")
}, error = function(e) message("  [WARN] Could not build Section C bivariate table: ", conditionMessage(e)))

fig_paths_use <- character(0)
if (exists("fig_paths") && length(fig_paths) > 0) {
  fig_keep <- basename(fig_paths)
  fig_keep <- fig_keep[grepl("^FigA[1-9]_", fig_keep)]
  fig_order <- sprintf("FigA%d", 1:9)
  ord <- order(match(sub("_.*$", "", fig_keep), fig_order))
  fig_paths_use <- file.path(FIG_DIR, fig_keep[ord])
}

sdoc <- officer::body_add_par(sdoc, "Online Appendix", style = "heading 1")
sdoc <- officer::body_add_par(
  sdoc,
  paste0(
    "This online appendix follows the ordering of the final submission document. ",
    "Sections A-K present the supplementary analyses referenced in the main text, ",
    "including descriptive material, baseline models, robustness checks, matching, dynamics, placebo tests, and appendix figures."
  ),
  style = "Normal"
)
sdoc <- officer::body_add_break(sdoc)

sdoc <- add_sec(
  sdoc,
  "Section A: Data, Measures, and Sample",
  "Summary statistics for the full panel and variable-level missing-data accounting.",
  list(ft_A1, ft_A3)
)

sdoc <- add_sec(
  sdoc,
  "Section B: Baseline Results, Causal Identification, and Linear Benchmarks",
  "Baseline ordered logit, logit, and ZINB results; multiple-imputation comparisons; pooled OLS benchmarks; and predicted probability changes.",
  list(ft_B1, ft_B2, ft_B3, ft_B4, ft_B9, ft_B10, ft_B12, ft_B15, ft_B16, ft_B17, ft_B18, ft_B19)
)

sdoc <- add_sec(
  sdoc,
  "Section C: Bivariate Models",
  "Bivariate robustness checks with baseline, selection control-function, and reverse-causality specifications.",
  list(ft_C1)
)

sdoc <- add_sec(
  sdoc,
  "Section D: Panel Estimators and Unobserved Heterogeneity",
  "Country fixed effects, random effects, and two-way fixed-effects specifications.",
  list(ft_D1, ft_D2, ft_D3, ft_D5)
)

sdoc <- add_sec(
  sdoc,
  "Section E: Alternative Estimators",
  "Alternative link functions and count-model comparisons.",
  list(ft_E1, ft_E2, ft_E3, ft_E4, ft_E6)
)

sdoc <- add_sec(
  sdoc,
  "Section F: Endogeneity and Selection",
  "Control-function corrections, stage-one diagnostics, lagged peacekeeping, IPW, correlated random effects, and placebo diagnostics.",
  list(ft_F1, ft_F2, ft_F3, ft_F4, ft_F5, ft_F6, ft_F7, ft_F8, ft_F9)
)

sdoc <- add_sec(
  sdoc,
  "Section G: Robustness and Sensitivity",
  "Listwise-versus-MI comparison, alternative dependent-variable codings, exclusion of P5 countries, and HC1 robust inference.",
  c(list(ft_G1), ft_G_alt[1:6], list(ft_G11, ft_G12))
)

sdoc <- add_sec(
  sdoc,
  "Section H: Matching and Balancing",
  "PSM and CEM balance checks, matched estimates, and comparison with IPW.",
  list(ft_H1, ft_H2, ft_H3, ft_H5)
)

sdoc <- add_sec(
  sdoc,
  "Section I: Dependence, Serial Correlation, and Dynamics",
  "Lagged dependent-variable, Cox duration, and event-time analyses.",
  list(ft_I1, ft_I3, ft_I4)
)

sdoc <- add_sec(
  sdoc,
  "Section J: Placebo Tests (Future Peacekeeping)",
  "Forward-looking placebo tests using one-year leads of the peacekeeping variables.",
  list(ft_J1, ft_J2)
)

sdoc <- officer::body_add_par(sdoc, "Section K: Appendix Figures", style = "heading 1")
sdoc <- officer::body_add_par(
  sdoc,
  "Appendix figures corresponding to the final document order (A1-A9).",
  style = "Normal"
)
sdoc <- officer::body_add_par(sdoc, "", style = "Normal")

if (length(fig_paths_use) > 0) {
  for (fp in fig_paths_use) {
    if (file.exists(fp)) {
      fig_label <- tools::file_path_sans_ext(basename(fp))
      fig_label <- gsub("_", " ", fig_label)
      sdoc <- tryCatch({
        sdoc <- officer::body_add_par(sdoc, fig_label, style = "heading 2")
        sdoc <- officer::body_add_img(sdoc, src = fp, width = 6.5, height = 4.5)
        sdoc <- officer::body_add_par(sdoc, "", style = "Normal")
        sdoc <- officer::body_add_break(sdoc)
        sdoc
      }, error = function(e) {
        message("  [WARN] Could not add figure: ", conditionMessage(e))
        sdoc
      })
    }
  }
} else {
  sdoc <- officer::body_add_par(sdoc, "[Figures not generated — ggplot2 may not be available.]", style = "Normal")
}

print(sdoc, target = supp_path)
message("  Saved: ", normalizePath(supp_path))
message("\n", strrep("=", 70))
message("ONLINE APPENDIX MATCHED SCRIPT COMPLETE")
message("  [OK] Output follows the section and table order of the current Word appendix.")
message("  [OK] Included tables: A1-A2, B1-B12, C1, D1-D4, E1-E5, F1-F9, G1-G9, H1-H4, I1-I3, J1-J2")
message("  [OK] Included figures: A1-A9")
message("  [OK] Output: PKMID_JCR_Online_Appendix.docx")
message(strrep("=", 70))
